diff --git a/res/A.png.save b/res/A.png.save
deleted file mode 100644
index 5d41180..0000000
Binary files a/res/A.png.save and /dev/null differ
diff --git a/res/C.png.save b/res/C.png.save
deleted file mode 100644
index 7a818e5..0000000
Binary files a/res/C.png.save and /dev/null differ
diff --git a/res/G.png.save b/res/G.png.save
deleted file mode 100644
index 355b078..0000000
Binary files a/res/G.png.save and /dev/null differ
diff --git a/res/T.png.save b/res/T.png.save
deleted file mode 100644
index f69fa19..0000000
Binary files a/res/T.png.save and /dev/null differ
diff --git a/scripts/10xgenomics_PBMC_5k/analysis_ctcf_motif_chr1.R b/scripts/10xgenomics_PBMC_5k/analysis_ctcf_motif_chr1.R
deleted file mode 100644
index 711629c..0000000
--- a/scripts/10xgenomics_PBMC_5k/analysis_ctcf_motif_chr1.R
+++ /dev/null
@@ -1,223 +0,0 @@
-setwd(file.path("/", "local", "groux", "scATAC-seq"))
-
-# libraries
-library(RColorBrewer)
-
-# functions
-source(file.path("scripts", "functions.R"))
-
-
-################## aggregations around CTCF motifs ################## 
-
-# data
-# open chromatin
-data.open.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_open_bin1bp_fragment.mat")))
-data.open.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_open_bin2bp_fragment.mat")))
-data.open.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_open_bin10bp_fragment.mat")))
-
-data.open.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_open_bin1bp_read.mat")))
-data.open.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_open_bin2bp_read.mat")))
-data.open.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_open_bin10bp_read.mat")))
-
-data.open.1.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_open_bin1bp_read_atac.mat")))
-data.open.2.atac  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_open_bin2bp_read_atac.mat")))
-data.open.10.atac = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_open_bin10bp_read_atac.mat")))
-
-# mono-nucleosomes
-data.1nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_1nucl_bin1bp_fragment.mat")))
-data.1nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_1nucl_bin2bp_fragment.mat")))
-data.1nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_1nucl_bin10bp_fragment.mat")))
-
-data.1nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_1nucl_bin1bp_read.mat")))
-data.1nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_1nucl_bin2bp_read.mat")))
-data.1nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_1nucl_bin10bp_read.mat")))
-
-data.1nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_1nucl_bin1bp_fragment_center.mat")))
-data.1nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_1nucl_bin2bp_fragment_center.mat")))
-data.1nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_1nucl_bin10bp_fragment_center.mat")))
-
-# di-nucleosomes
-data.2nucl.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nucl_bin1bp_fragment.mat")))
-data.2nucl.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nucl_bin2bp_fragment.mat")))
-data.2nucl.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nucl_bin10bp_fragment.mat")))
-
-data.2nucl.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nucl_bin1bp_read.mat")))
-data.2nucl.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nucl_bin2bp_read.mat")))
-data.2nucl.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nucl_bin10bp_read.mat")))
-
-data.2nucl.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nucl_bin1bp_fragment_center.mat")))
-data.2nucl.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nucl_bin2bp_fragment_center.mat")))
-data.2nucl.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nucl_bin10bp_fragment_center.mat")))
-
-# mono-nucleosomes from di-nucleosome data
-data.nucls.1.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin1bp_fragment.mat")))
-data.nucls.2.frag  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin2bp_fragment.mat")))
-data.nucls.10.frag = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin10bp_fragment.mat")))
-
-data.nucls.1.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin1bp_read.mat")))
-data.nucls.2.read  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin2bp_read.mat")))
-data.nucls.10.read = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin10bp_read.mat")))
-
-data.nucls.1.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin1bp_fragment_center.mat")))
-data.nucls.2.cent  = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin2bp_fragment_center.mat")))
-data.nucls.10.cent = as.matrix(read.table(file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin10bp_fragment_center.mat")))
-
-
-# colors
-col = brewer.pal(4, "Set1")
-
-# x-axis
-axis.at.1   = seq(0, ncol(data.open.1.frag), length.out =5)
-axis.lab.1  = seq(-400,   400, by=200)
-axis.at.2   = seq(0, ncol(data.open.2.frag), length.out =5)
-axis.lab.2  = seq(-400,   400, by=200)
-axis.at.10  = seq(0, ncol(data.open.10.frag), length.out=5)
-axis.lab.10 = seq(-1000, 1000, by=500)
-
-# X11(width=12, height=12)
-png(filename=file.path("results/10xgenomics_PBMC_5k/ctcf_motifs_10e-6_chr1_aggregations.png"),
-    units="in", res=720, width=12, height=9)
-  m = matrix(nrow=4, ncol=4,
-             data=c(16,13,14,15,
-                    10, 1, 4, 7,
-                    11, 2, 5, 8,
-                    12, 3, 6, 9), byrow=T)
-  l = layout(mat=m, widths=c(0.2, 1, 1, 1), heights=c(0.2, 1, 1, 1))
-  layout.show(l)
-  
-  p = par(mar=c(5.1, 5.1, 4.1, 2.1))
-  
-  # 1bp resolution
-  ## entire fragments
-  plot(colMeans(data.open.1.frag),   col=col[1], lwd=3, type='l',
-       main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
-       cex.axis=2, cex.lab=2)
-  lines(colMeans(data.open.1.frag),  col=col[1], lwd=3)
-  lines(colMeans(data.1nucl.1.frag), col=col[2], lwd=3)
-  lines(colMeans(data.2nucl.1.frag), col=col[3], lwd=3)
-  lines(colMeans(data.nucls.1.frag), col=col[4], lwd=3)
-  axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
-  ## entire reads
-  plot(colMeans(data.open.1.read),   col=col[1], lwd=3, type='l',
-       main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
-       cex.axis=2, cex.lab=2)
-  lines(colMeans(data.1nucl.1.read), col=col[2], lwd=3)
-  lines(colMeans(data.2nucl.1.read), col=col[3], lwd=3)
-  lines(colMeans(data.nucls.1.read), col=col[4], lwd=3)
-  axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
-  ## atac reads and centers
-  plot(colMeans(data.open.1.atac)/max(colMeans(data.open.1.atac)),
-       col=col[1], lwd=3, type='l', xaxt='n',
-       main="", xlab="pos[bp]", ylab="Prop max signal",
-       cex.axis=2, cex.lab=2)
-  lines(colMeans(data.1nucl.1.cent)/max(colMeans(data.1nucl.1.cent)), 
-        col=col[2], lwd=3)
-  lines(colMeans(data.2nucl.1.cent)/max(colMeans(data.2nucl.1.cent)),
-        col=col[3], lwd=3)
-  lines(colMeans(data.nucls.1.cent)/max(colMeans(data.nucls.1.cent)),
-        col=col[4], lwd=3)
-  axis(side=1, at=axis.at.1, labels=axis.lab.1, cex.axis=1.8)
-  
-  # 2bp resolution
-  ## entire fragments
-  plot(colMeans(data.open.2.frag),   col=col[1], lwd=3, type='l',
-       main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
-       cex.axis=2, cex.lab=2)
-  lines(colMeans(data.1nucl.2.frag), col=col[2], lwd=3)
-  lines(colMeans(data.2nucl.2.frag), col=col[3], lwd=3)
-  lines(colMeans(data.nucls.2.frag), col=col[4], lwd=3)
-  axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
-  ## entire reads
-  plot(colMeans(data.open.2.read),   col=col[1], lwd=3, type='l',
-       main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
-       cex.axis=2, cex.lab=2)
-  lines(colMeans(data.1nucl.2.read), col=col[2], lwd=3)
-  lines(colMeans(data.2nucl.2.read), col=col[3], lwd=3)
-  lines(colMeans(data.nucls.2.read), col=col[4], lwd=3)
-  axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
-  ## atac reads and centers
-  plot(colMeans(data.open.2.atac)/max(colMeans(data.open.2.atac)),
-       col=col[1], lwd=3, type='l', xaxt='n',
-       main="", xlab="pos[bp]", ylab="Prop max signal",
-       cex.axis=2, cex.lab=2)
-  lines(colMeans(data.1nucl.2.cent)/max(colMeans(data.1nucl.2.cent)), 
-        col=col[2], lwd=3)
-  lines(colMeans(data.2nucl.2.cent)/max(colMeans(data.2nucl.2.cent)),
-        col=col[3], lwd=3)
-  lines(colMeans(data.nucls.2.cent)/max(colMeans(data.nucls.2.cent)),
-        col=col[4], lwd=3)
-  axis(side=1, at=axis.at.2, labels=axis.lab.2, cex.axis=1.8)
-  
-  # 10bp resolution
-  ## entire fragments
-  plot(colMeans(data.open.10.frag),   col=col[1], lwd=3, type='l',
-       main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
-       cex.axis=2, cex.lab=2)
-  lines(colMeans(data.1nucl.10.frag), col=col[2], lwd=3)
-  lines(colMeans(data.2nucl.10.frag), col=col[3], lwd=3)
-  lines(colMeans(data.nucls.10.frag), col=col[4], lwd=3)
-  axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
-  ## entire reads
-  plot(colMeans(data.open.10.read),   col=col[1], lwd=3, type='l',
-       main="", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
-       cex.axis=2, cex.lab=2)
-  lines(colMeans(data.1nucl.10.read), col=col[2], lwd=3)
-  lines(colMeans(data.2nucl.10.read), col=col[3], lwd=3)
-  lines(colMeans(data.nucls.10.read), col=col[4], lwd=3)
-  axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
-  ## atac reads and centers
-  plot(colMeans(data.open.10.atac)/max(colMeans(data.open.10.atac)),
-       col=col[1], lwd=3, type='l', xaxt='n',
-       main="", xlab="pos[bp]", ylab="Prop max signal",
-       cex.axis=2, cex.lab=2)
-  lines(colMeans(data.1nucl.10.cent)/max(colMeans(data.1nucl.10.cent)), 
-        col=col[2], lwd=3)
-  lines(colMeans(data.2nucl.10.cent)/max(colMeans(data.2nucl.10.cent)),
-        col=col[3], lwd=3)
-  lines(colMeans(data.nucls.10.cent)/max(colMeans(data.nucls.10.cent)),
-        col=col[4], lwd=3)
-  axis(side=1, at=axis.at.10, labels=axis.lab.10, cex.axis=1.8)
-  
-  # some legends over the rows and columns
-  p = par(mar=c(0,0,0,0))
-  plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
-  text(0, 0, labels="FRAGMENTS", cex=2, srt=90)
-  
-  plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
-  text(0, 0, labels="READS", cex=2, srt=90)
-  
-  plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
-  text(0, 0, labels="EDGES/CENTERS", cex=2, srt=90)
-  
-  plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
-  text(0, 0, labels="+/-400bp by 1bp", cex=2)
-  
-  plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
-  text(0, 0, labels="+/-400bp by 2bp", cex=2)
-  
-  plot(0, 0, col=0, main="", xlab="", ylab="", xaxt='n', yaxt='n')
-  text(0, 0, labels="+/-1kp by 10bp", cex=2)
-  
-  par(p)
-dev.off()
-
-
-
-# footprint
-# x-axis
-axis.at.fp  = seq(0, 200,    length.out=3)
-axis.lab.fp = seq(-100, 100, by=100)
-
-
-# X11(width=8, height=4)
-png(filename=file.path("results", "10xgenomics_PBMC_5k", "ctcf_motifs_10e-6_chr1_footprint.png"),
-    units="in", res=720, width=8, height=4)
-  p = par(mar=c(5.1, 5.1, 4.1, 2.1))
-  plot(colMeans(data.open.1.atac[,300:500]), type='l', lwd=3, col=col[1],
-       main="CTCF motif", xlab="pos[bp]", ylab="Nb of reads", xaxt='n',
-       cex.axis=2, cex.lab=2)
-  abline(v=90, lwd=3, lty=2)
-  abline(v=110, lwd=3, lty=2)
-  axis(side=1, at=axis.at.fp, labels=axis.lab.fp, cex.axis=1.8)
-  par(p)
-dev.off()
diff --git a/scripts/10xgenomics_PBMC_5k/analysis_ctcf_motif_chr1.sh b/scripts/10xgenomics_PBMC_5k/analysis_ctcf_motif_chr1.sh
deleted file mode 100755
index 6c21352..0000000
--- a/scripts/10xgenomics_PBMC_5k/analysis_ctcf_motif_chr1.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-# some paths
-## directories
-results_dir='results/10xgenomics_PBMC_5k'
-data_dir='data/10xgenomics_PBMC_5k/'
-## input
-file_bed=$data_dir'/ctcf_motifs_10e-6_chr1.bed'
-file_bam_open="$data_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
-file_bai_open="$data_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
-file_bam_1nucl="$data_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam"
-file_bai_1nucl="$data_dir/atac_v1_pbmc_5k_possorted_filtered_133-266bp.bam.bai"
-file_bam_2nucl="$data_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam"
-file_bai_2nucl="$data_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp.bam.bai"
-file_bam_1nucl2="$data_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam"
-file_bai_1nucl2="$data_dir/atac_v1_pbmc_5k_possorted_filtered_341-500bp_splitintwo.bam.bai"
-
-mkdir -p $results_dir
-
-# matrix creation
-## open chromatin around CTCF motif
-for method in 'read' 'read_atac' 'fragment'
-do
-	file_mat_open_1="$results_dir/ctcf_motifs_10e-6_chr1_open_bin1bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 1  --method $method > $file_mat_open_1
-	file_mat_open_2="$results_dir/ctcf_motifs_10e-6_chr1_open_bin2bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -400 --to 400   --binSize 2  --method $method > $file_mat_open_2
-	file_mat_open_10="$results_dir/ctcf_motifs_10e-6_chr1_open_bin10bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_open_10
-done
-
-## mono around CTCF motif
-for method in 'read' 'fragment' 'fragment_center'
-do
-	### mono nucleosomes
-	file_mat_1nucl_1="$results_dir/ctcf_motifs_10e-6_chr1_1nucl_bin1bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
-	file_mat_1nucl_2="$results_dir/ctcf_motifs_10e-6_chr1_1nucl_bin2bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
-	file_mat_1nucl_10="$results_dir/ctcf_motifs_10e-6_chr1_1nucl_bin10bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl --bai $file_bai_1nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
-done
-
-
-## di nucleosomes around CTCF motif
-for method in 'read' 'fragment' 'fragment_center'
-do
-	### di nucleosomes
-	file_mat_2nucl_1="$results_dir/ctcf_motifs_10e-6_chr1_2nucl_bin1bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 1  --method $method > $file_mat_2nucl_1
-	file_mat_2nucl_2="$results_dir/ctcf_motifs_10e-6_chr1_2nucl_bin2bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -400 --to 400   --binSize 2  --method $method > $file_mat_2nucl_2
-	file_mat_2nucl_10="$results_dir/ctcf_motifs_10e-6_chr1_2nucl_bin10bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_2nucl --bai $file_bai_2nucl --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_2nucl_10
-done
-
-
-## mono nucleosomes from processed di-nucleosome data around CTCF motif
-for method in 'read' 'fragment' 'fragment_center'
-do
-	### mono nucleosomes
-	file_mat_1nucl_1="$results_dir/ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin1bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 1  --method $method > $file_mat_1nucl_1
-	file_mat_1nucl_2="$results_dir/ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin2bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -400 --to 400   --binSize 2  --method $method > $file_mat_1nucl_2
-	file_mat_1nucl_10="$results_dir/ctcf_motifs_10e-6_chr1_2nuclsplitintwo_bin10bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_1nucl2 --bai $file_bai_1nucl2 --from -1000 --to 1000 --binSize 10 --method $method > $file_mat_1nucl_10
-done
-
diff --git a/scripts/10xgenomics_PBMC_5k/analysis_peaks.sh b/scripts/10xgenomics_PBMC_5k/analysis_peaks.sh
index 4979f3a..2c95a12 100755
--- a/scripts/10xgenomics_PBMC_5k/analysis_peaks.sh
+++ b/scripts/10xgenomics_PBMC_5k/analysis_peaks.sh
@@ -1,38 +1,68 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k'
 data_dir='data'
 read_dir="$data_dir/10xgenomics_PBMC_5k"
 seq_dir="$data_dir/genomes"
-## input1
+## input
 file_bed=$read_dir'/atac_v1_pbmc_5k_peaks.bed'
+file_bed_rmsk=$read_dir'/atac_v1_pbmc_5k_peaks_rmsk.bed'
 file_bam_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
 file_bai_open="$read_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
 file_bam_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam"
 file_bai_nucl="$read_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai"
 file_hg19="$seq_dir/hg19.fasta"
+file_rmsk="$seq_dir/hg19_rmsk.bed"
 
 mkdir -p $results_dir
 
+# repeat mask
+# remove any peak that has at least 50% of its length overlapping a repeated region (its 
+# center is inside the region, this is somewhat equivalent to what is done on ccg webinterface
+# when checking the repeatMask on option)
+bin/bedtools/subtractBed -f 0.5 -A -a data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_peaks.bed -b data/genomes/hg19_rmsk.bed > $file_bed_rmsk
+
+
+# sampled from bed
+file_bed_rmsk_2=$read_dir'/atac_v1_pbmc_5k_peaks_rmsk_sampled.bed'
+shuf $file_bed_rmsk | head -n 10000 > $file_bed_rmsk_2
+
 # matrix creation
-## sequences
-file_mat_seq="$results_dir/peaks_sequences.mat"
-bin/SequenceMatrixCreator --bed $file_bed --fasta $file_hg19 --from -500 --to 500 > $file_mat_seq
+## 1kb sequences
+file_mat_seq_1kb_1="$results_dir/peaks_rmsk_sequences_1kb.mat"
+file_mat_seq_1kb_2="$results_dir/peaks_rmsk_sampled_sequences_1kb.mat"
+bin/SequenceMatrixCreator --bed $file_bed_rmsk   --fasta $file_hg19 --from -500 --to 500 > $file_mat_seq_1kb_1
+bin/SequenceMatrixCreator --bed $file_bed_rmsk_2 --fasta $file_hg19 --from -500 --to 500 > $file_mat_seq_1kb_2
+## 2kb sequences
+file_mat_seq_2kb_1="$results_dir/peaks_rmsk_sequences_2kb.mat"
+file_mat_seq_2kb_2="$results_dir/peaks_rmsk_sampled_sequences_2kb.mat"
+bin/SequenceMatrixCreator --bed $file_bed_rmsk   --fasta $file_hg19 --from -1000 --to 1000 > $file_mat_seq_2kb_1
+bin/SequenceMatrixCreator --bed $file_bed_rmsk_2 --fasta $file_hg19 --from -1000 --to 1000 > $file_mat_seq_2kb_2
 
 ## open chromatin around peaks
 for method in 'read_atac'
 do
-	file_mat_open_1="$results_dir/peaks_open_bin1bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_open --bai $file_bai_open --from -500 --to 500   --binSize 1  --method $method > $file_mat_open_1
+	file_mat_open_1kb_1="$results_dir/peaks_rmsk_open_bin1bp_1kb_$method.mat"
+	file_mat_open_1kb_2="$results_dir/peaks_rmsk_sampled_open_bin1bp_1kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk   --bam $file_bam_open --bai $file_bai_open --from -500 --to 500 --binSize 1 --method $method > $file_mat_open_1kb_1
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk_2 --bam $file_bam_open --bai $file_bai_open --from -500 --to 500 --binSize 1 --method $method > $file_mat_open_1kb_2
+	file_mat_open_2kb_1="$results_dir/peaks_rmsk_open_bin1bp_2kb_$method.mat"
+	file_mat_open_2kb_2="$results_dir/peaks_rmsk_sampled_open_bin1bp_2kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk   --bam $file_bam_open --bai $file_bai_open --from -1000 --to 1000 --binSize 1 --method $method > $file_mat_open_2kb_1
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk_2 --bam $file_bam_open --bai $file_bai_open --from -1000 --to 1000 --binSize 1 --method $method > $file_mat_open_2kb_2
 done
 
-
 ## all nucleosomes around peaks
 for method in 'fragment_center'
 do
-	### mono nucleosomes
-	file_mat_nucl_1="$results_dir/peaks_nucleosomes_bin1bp_$method.mat"
-	bin/CorrelationMatrixCreator --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --from -500 --to 500   --binSize 1  --method $method > $file_mat_nucl_1
+	file_mat_nucl_1kb_1="$results_dir/peaks_rmsk_nucleosomes_bin1bp_1kb_$method.mat"
+	file_mat_nucl_1kb_2="$results_dir/peaks_rmsk_sampled_nucleosomes_bin1bp_1kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk   --bam $file_bam_nucl --bai $file_bai_nucl --from -500 --to 500 --binSize 1 --method $method > $file_mat_nucl_1kb_1
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk_2 --bam $file_bam_nucl --bai $file_bai_nucl --from -500 --to 500 --binSize 1 --method $method > $file_mat_nucl_1kb_2
+	file_mat_nucl_2kb_1="$results_dir/peaks_rmsk_nucleosomes_bin1bp_2kb_$method.mat"
+	file_mat_nucl_2kb_2="$results_dir/peaks_rmsk_sampled_nucleosomes_bin1bp_2kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk   --bam $file_bam_nucl --bai $file_bai_nucl --from -1000 --to 1000 --binSize 1 --method $method > $file_mat_nucl_2kb_1
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk_2 --bam $file_bam_nucl --bai $file_bai_nucl --from -1000 --to 1000 --binSize 1 --method $method > $file_mat_nucl_2kb_2
 done
 
 
diff --git a/scripts/10xgenomics_PBMC_5k/process_data.sh b/scripts/10xgenomics_PBMC_5k/process_data.sh
index 755fe66..2f11be8 100755
--- a/scripts/10xgenomics_PBMC_5k/process_data.sh
+++ b/scripts/10xgenomics_PBMC_5k/process_data.sh
@@ -1,19 +1,17 @@
 mkdir -p data/10xgenomics_PBMC_5k
 
 # download 10xGenomics 5k PBMC ss-ATAC-seq dataset
 wget -O data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted.bam http://s3-us-west-2.amazonaws.com/10x.files/samples/cell-atac/1.0.1/atac_v1_pbmc_5k/atac_v1_pbmc_5k_possorted_bam.bam
 # download some barecode informations
 wget -O data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_singlecell.csv http://cf.10xgenomics.com/samples/cell-atac/1.0.1/atac_v1_pbmc_5k/atac_v1_pbmc_5k_singlecell.csv
 # download their peaks
 wget -O data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_peaks.bed http://cf.10xgenomics.com/samples/cell-atac/1.0.1/atac_v1_pbmc_5k/atac_v1_pbmc_5k_peaks.bed
 sed -E s/^\([0-9XY]+\)/chr\\1/ data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_peaks.bed | grep -E ^chr | sort -k 1,1V -k2,2n -k3,3n > data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_peaks_sort.bed
 mv data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_peaks_sort.bed data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_peaks.bed
-# get only peaks on chr1
-grep -E '^chr1[[:space:]]' data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_peaks.bed > data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_peaks_chr1.bed
 
 # extract the barecodes corresponding to cells, based on 10XGenomics analysis
 grep -E _cell_[0-9]+ data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_singlecell.csv | cut -d ',' -f 1 > data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_barcodes.txt
 
 # filter out reads which do not have a proper barcode
 python3.6 scripts/bam_tools/filter_bam.py -i data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted.bam --tag CB --values data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_barcodes.txt -o data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered.bam
 
diff --git a/scripts/10xgenomics_PBMC_5k/run_all.sh b/scripts/10xgenomics_PBMC_5k/run_all.sh
index e961146..0d10fa5 100755
--- a/scripts/10xgenomics_PBMC_5k/run_all.sh
+++ b/scripts/10xgenomics_PBMC_5k/run_all.sh
@@ -1,11 +1,6 @@
 
 # download the data, filter them and split by fragment size
 mkdir -p data/10xgenomics_PBMC_5k
 scripts/10xgenomics_PBMC_5k/process_data.sh
 scripts/10xgenomics_PBMC_5k/split_by_size.sh
 
-
-# analyse chromosome 1
-scripts/10xgenomics_PBMC_5k/analysis_chr1.sh
-Rscript scripts scripts/10xgenomics_PBMC_5k/analysis_chr1.R
-
diff --git a/scripts/10xgenomics_PBMC_5k_classification_1/classification_ctcf_motif.R b/scripts/10xgenomics_PBMC_5k_classification_1/classification_ctcf_motif.R
index 1685922..8df2587 100644
--- a/scripts/10xgenomics_PBMC_5k_classification_1/classification_ctcf_motif.R
+++ b/scripts/10xgenomics_PBMC_5k_classification_1/classification_ctcf_motif.R
@@ -1,96 +1,172 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 library(seqLogo)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # the minimum number of classes searched
 k.min = 1
 # the maximum number of classes searched
 k.max = 10
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
-################## sequence patterns around ctcf motifs ##################
+################## open chromatin patterns around ctcf motifs ##################
 
 for(k in k.min:k.max)
 {
   # open chromatin
   data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", 
                                           sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_model.mat", k)))
   model.open = data$models
   model.prob = data$prob
   data = NULL
   # nucleosomes
   model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1",
                                           sprintf("ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_%dclass_model.mat", k)))$models
   # sequence
   model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_classification_1",
                                               sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_sequences_model.mat", k)))$models
   
   # plot classes
   col = brewer.pal(3, "Set1")
   # X11(width=17, height=10)
   png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_1",
                          sprintf("ctcf_motifs_10e-6_classification_open_bin1bp_%dclass.png", k)),
       units="in", res=720, width=18, height=12)
     m = matrix(1:10, nrow=5, ncol=2, byrow=F)
     layout(m)
     # order from most to least probable class
     ord      = order(model.prob, decreasing=T)
     ref.open = model.open[ord,, drop=F]
     ref.nucl = model.nucl[ord,, drop=F]
     ref.seq  = model.seq[,,ord, drop=F]
     prob     = model.prob[ord]
     class    = c(1:nrow(ref.open))[ord]
     for(i in 1:nrow(ref.open))
     { # plot logo
       plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
                 main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
       # x-axis
       x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
       x.at  = (x.lab + ncol(ref.open)) / 2
       axis(1, at=x.at, labels=x.lab)
       # y-axis is [0,1] for min/max signal
       x.at = seq(0, 1, 0.5)
       axis(2, at=x.at, labels=x.at)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
     }
     row_n = 1 # row counter
     col_n = 1 # column counter
     for(i in 1:nrow(ref.open))
     { # plot logo center
       right  = 0.5*col_n - 0.01
       left   = right - 0.2
       bottom = 1-(row_n*(0.2))+0.05
       top    = bottom + 0.15
       par(fig=c(left, right, bottom, top), new=T)
       idx = 380:420
       plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
       # xaxis
       x.at = 1:length(idx)
       axis(1, at=x.at, labels=x.at)
       # yaxis
       x.at = seq(0, 2, by=1)
       axis(2, at=x.at, labels=x.at)
       row_n = row_n + 1
       if(i %% 5 == 0)
       { col_n = col_n + 1
         row_n = 1
       }
     }
   dev.off()
 }
 
+
+################## nucleosomes chromatin patterns around ctcf motifs ##################
+
+for(k in k.min:k.max)
+{
+  # open chromatin
+  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1", 
+                                          sprintf("ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_%dclass_open_read_atac_model.mat", k)))
+  model.open = data$models
+  model.prob = data$prob
+  data = NULL
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_1",
+                                          sprintf("ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_%dclass_model.mat", k)))$models
+  # sequence
+  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_classification_1",
+                                              sprintf("ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_%dclass_sequences_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=17, height=10)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_1",
+                         sprintf("ctcf_motifs_10e-6_classification_1nucl_bin1bp_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+  m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+  layout(m)
+  # order from most to least probable class
+  ord      = order(model.prob, decreasing=T)
+  ref.open = model.open[ord,, drop=F]
+  ref.nucl = model.nucl[ord,, drop=F]
+  ref.seq  = model.seq[,,ord, drop=F]
+  prob     = model.prob[ord]
+  class    = c(1:nrow(ref.open))[ord]
+  for(i in 1:nrow(ref.open))
+  { # plot logo
+    plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+              main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+    # x-axis
+    x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
+    x.at  = (x.lab + ncol(ref.open)) / 2
+    axis(1, at=x.at, labels=x.lab)
+    # y-axis is [0,1] for min/max signal
+    x.at = seq(0, 1, 0.5)
+    axis(2, at=x.at, labels=x.at)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+  }
+  row_n = 1 # row counter
+  col_n = 1 # column counter
+  for(i in 1:nrow(ref.open))
+  { # plot logo center
+    right  = 0.5*col_n - 0.01
+    left   = right - 0.2
+    bottom = 1-(row_n*(0.2))+0.05
+    top    = bottom + 0.15
+    par(fig=c(left, right, bottom, top), new=T)
+    idx = 380:420
+    plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    # xaxis
+    x.at = 1:length(idx)
+    axis(1, at=x.at, labels=x.at)
+    # yaxis
+    x.at = seq(0, 2, by=1)
+    axis(2, at=x.at, labels=x.at)
+    row_n = row_n + 1
+    if(i %% 5 == 0)
+    { col_n = col_n + 1
+      row_n = 1
+    }
+  }
+  dev.off()
+}
diff --git a/scripts/10xgenomics_PBMC_5k_classification_1/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_1/classification_ctcf_motif.sh
index 6b59c2f..da45b83 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_1/classification_ctcf_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_1/classification_ctcf_motif.sh
@@ -1,53 +1,52 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_1'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=3
+n_core=8
 
 # open chromatin
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_1nucl_fragment_center_model.mat'
 	file_mod3=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
 	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 done
 
 # 1nucl chromatin
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_open_read_atac_model.mat'
 	file_mod3=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_sequences_model.mat'
 	file_aic=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMRead      --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 done
 
diff --git a/scripts/10xgenomics_PBMC_5k_classification_1/classification_ebf1_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_1/classification_ebf1_motif.sh
index 5f7872e..931e907 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_1/classification_ebf1_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_1/classification_ebf1_motif.sh
@@ -1,53 +1,52 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_1'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/ebf1_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ebf1_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ebf1_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=3
+n_core=8
 
 # open chromatin
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_1nucl_fragment_center_model.mat'
 	file_mod3=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
 	file_aic=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 done
 
 # 1nucl chromatin
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod2=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_open_read_atac_model.mat'
 	file_mod3=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_sequences_model.mat'
 	file_aic=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMRead      --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 done
 
diff --git a/scripts/10xgenomics_PBMC_5k_classification_1/classification_myc_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_1/classification_myc_motif.sh
index 801a070..e95bb7e 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_1/classification_myc_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_1/classification_myc_motif.sh
@@ -1,53 +1,53 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_1'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/myc_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/myc_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/myc_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/myc_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=3
+n_core=8
 
 # open chromatin
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_1nucl_fragment_center_model.mat'	
 	file_mod3=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
 	file_aic=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 done
 
 # 1nucl chromatin
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod2=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_open_read_atac_model.mat'
 	file_mod3=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_sequences_model.mat'
 	file_aic=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --seq  $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMRead      --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 done
 
+
diff --git a/scripts/10xgenomics_PBMC_5k_classification_1/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_1/classification_sp1_motif.sh
index 8e34470..c7ca927 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_1/classification_sp1_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_1/classification_sp1_motif.sh
@@ -1,53 +1,52 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_1'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/sp1_motifs_10e-7_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=3
+n_core=8
 
 # open chromatin
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_1nucl_fragment_center_model.mat'
 	file_mod3=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
 	file_aic=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMRead      --read $file_mat_open  --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 done
 
 # 1nucl chromatin
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod2=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_open_read_atac_model.mat'
 	file_mod3=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_sequences_model.mat'
 	file_aic=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMRead      --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 done
 
diff --git a/scripts/10xgenomics_PBMC_5k_classification_2/classification_ctcf_motif.R b/scripts/10xgenomics_PBMC_5k_classification_2/classification_ctcf_motif.R
index a8f1f35..193ff0b 100644
--- a/scripts/10xgenomics_PBMC_5k_classification_2/classification_ctcf_motif.R
+++ b/scripts/10xgenomics_PBMC_5k_classification_2/classification_ctcf_motif.R
@@ -1,96 +1,98 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 library(seqLogo)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # the minimum number of classes searched
 k.min = 1
 # the maximum number of classes searched
 k.max = 10
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
 ################## sequence patterns around ctcf motifs ##################
 
 for(k in k.min:k.max)
 {
   # open chromatin
   data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_2", 
                                           sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_model.mat", k)))
   model.open = data$models
   model.prob = data$prob
   data = NULL
   # nucleosomes
   model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_2",
                                           sprintf("ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_%dclass_model.mat", k)))$models
   # sequence
   model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_classification_2",
                                               sprintf("ctcf_motifs_10e-6_sequences_%dclass_model.mat", k)))$models
   
   # plot classes
   col = brewer.pal(3, "Set1")
   # X11(width=17, height=10)
   png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_2",
                          sprintf("ctcf_motifs_10e-6_classification_%dclass.png", k)),
       units="in", res=720, width=18, height=12)
     m = matrix(1:10, nrow=5, ncol=2, byrow=F)
     layout(m)
     # order from most to least probable class
     ord      = order(model.prob, decreasing=T)
     ref.open = model.open[ord,, drop=F]
     ref.nucl = model.nucl[ord,, drop=F]
     ref.seq  = model.seq[,,ord, drop=F]
     prob     = model.prob[ord]
     class    = c(1:nrow(ref.open))[ord]
     for(i in 1:nrow(ref.open))
     { # plot logo
       plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
                 main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
       # x-axis
-      x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
-      x.at  = (x.lab + ncol(ref.open)) / 2
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
       axis(1, at=x.at, labels=x.lab)
       # y-axis is [0,1] for min/max signal
-      x.at = seq(0, 1, 0.5)
-      axis(2, at=x.at, labels=x.at)
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
     }
+    # inlets with center
     row_n = 1 # row counter
     col_n = 1 # column counter
     for(i in 1:nrow(ref.open))
     { # plot logo center
       right  = 0.5*col_n - 0.01
       left   = right - 0.2
       bottom = 1-(row_n*(0.2))+0.05
       top    = bottom + 0.15
       par(fig=c(left, right, bottom, top), new=T)
-      idx = 380:420
+      idx = (391-1-20):(391+1+20)
       plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
       # xaxis
-      x.at = 1:length(idx)
-      axis(1, at=x.at, labels=x.at)
+      x.at = seq(1, length(idx), length.out = 3)
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+      axis(1, at=x.at, labels=x.lab)
       # yaxis
-      x.at = seq(0, 2, by=1)
-      axis(2, at=x.at, labels=x.at)
+      axis(2, at=y.at, labels=y.lab)
       row_n = row_n + 1
       if(i %% 5 == 0)
       { col_n = col_n + 1
         row_n = 1
       }
     }
   dev.off()
 }
 
diff --git a/scripts/10xgenomics_PBMC_5k_classification_2/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_2/classification_ctcf_motif.sh
index 5cc89c7..341fe6a 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_2/classification_ctcf_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_2/classification_ctcf_motif.sh
@@ -1,37 +1,36 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_2'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=3
+n_core=12
 
 # open chromatin and nucleosomes
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_open --seq $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMJoint     --read $file_mat_open  --seq  $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 	
 done
diff --git a/scripts/10xgenomics_PBMC_5k_classification_2/classification_ebf1_motif.R b/scripts/10xgenomics_PBMC_5k_classification_2/classification_ebf1_motif.R
index cb92556..e3efefd 100644
--- a/scripts/10xgenomics_PBMC_5k_classification_2/classification_ebf1_motif.R
+++ b/scripts/10xgenomics_PBMC_5k_classification_2/classification_ebf1_motif.R
@@ -1,96 +1,98 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 library(seqLogo)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # the minimum number of classes searched
 k.min = 1
 # the maximum number of classes searched
 k.max = 10
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
 ################## sequence patterns around ebf1 motifs ##################
 
 for(k in k.min:k.max)
 {
   # open chromatin
   data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_2", 
                                           sprintf("ebf1_motifs_10e-6_open_bin1bp_read_atac_%dclass_model.mat", k)))
   model.open = data$models
   model.prob = data$prob
   data = NULL
   # nucleosomes
   model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_2",
                                           sprintf("ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_%dclass_model.mat", k)))$models
   # sequence
   model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_classification_2",
                                               sprintf("ebf1_motifs_10e-6_sequences_%dclass_model.mat", k)))$models
   
   # plot classes
   col = brewer.pal(3, "Set1")
   # X11(width=17, height=10)
   png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_2",
                          sprintf("ebf1_motifs_10e-6_classification_%dclass.png", k)),
       units="in", res=720, width=18, height=12)
     m = matrix(1:10, nrow=5, ncol=2, byrow=F)
     layout(m)
     # order from most to least probable class
     ord      = order(model.prob, decreasing=T)
     ref.open = model.open[ord,, drop=F]
     ref.nucl = model.nucl[ord,, drop=F]
     ref.seq  = model.seq[,,ord, drop=F]
     prob     = model.prob[ord]
     class    = c(1:nrow(ref.open))[ord]
     for(i in 1:nrow(ref.open))
     { # plot logo
       plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
                 main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
       # x-axis
-      x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
-      x.at  = (x.lab + ncol(ref.open)) / 2
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
       axis(1, at=x.at, labels=x.lab)
       # y-axis is [0,1] for min/max signal
-      x.at = seq(0, 1, 0.5)
-      axis(2, at=x.at, labels=x.at)
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
     }
+    # inlets with center
     row_n = 1 # row counter
     col_n = 1 # column counter
     for(i in 1:nrow(ref.open))
     { # plot logo center
       right  = 0.5*col_n - 0.01
       left   = right - 0.2
       bottom = 1-(row_n*(0.2))+0.05
       top    = bottom + 0.15
       par(fig=c(left, right, bottom, top), new=T)
-      idx = 380:420
+      idx = (391-1-20):(391+1+20)
       plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
       # xaxis
-      x.at = 1:length(idx)
-      axis(1, at=x.at, labels=x.at)
+      x.at = seq(1, length(idx), length.out = 3)
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+      axis(1, at=x.at, labels=x.lab)
       # yaxis
-      x.at = seq(0, 2, by=1)
-      axis(2, at=x.at, labels=x.at)
+      axis(2, at=y.at, labels=y.lab)
       row_n = row_n + 1
       if(i %% 5 == 0)
       { col_n = col_n + 1
         row_n = 1
       }
     }
   dev.off()
 }
 
diff --git a/scripts/10xgenomics_PBMC_5k_classification_2/classification_ebf1_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_2/classification_ebf1_motif.sh
index d9fbe7d..ce5cdc0 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_2/classification_ebf1_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_2/classification_ebf1_motif.sh
@@ -1,37 +1,36 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_2'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/ebf1_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ebf1_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ebf1_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=3
+n_core=12
 
 # open chromatin and nucleosomes
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'ebf1_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_open --seq $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMJoint     --read $file_mat_open  --seq  $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 	
 done
diff --git a/scripts/10xgenomics_PBMC_5k_classification_2/classification_myc_motif.R b/scripts/10xgenomics_PBMC_5k_classification_2/classification_myc_motif.R
index 902b5d9..c79b248 100644
--- a/scripts/10xgenomics_PBMC_5k_classification_2/classification_myc_motif.R
+++ b/scripts/10xgenomics_PBMC_5k_classification_2/classification_myc_motif.R
@@ -1,96 +1,98 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 library(seqLogo)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # the minimum number of classes searched
 k.min = 1
 # the maximum number of classes searched
 k.max = 10
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
 ################## sequence patterns around myc motifs ##################
 
 for(k in k.min:k.max)
 {
   # open chromatin
   data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_2", 
                                           sprintf("myc_motifs_10e-6_open_bin1bp_read_atac_%dclass_model.mat", k)))
   model.open = data$models
   model.prob = data$prob
   data = NULL
   # nucleosomes
   model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_2",
                                           sprintf("myc_motifs_10e-6_1nucl_bin1bp_fragment_center_%dclass_model.mat", k)))$models
   # sequence
   model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_classification_2",
                                               sprintf("myc_motifs_10e-6_sequences_%dclass_model.mat", k)))$models
   
   # plot classes
   col = brewer.pal(3, "Set1")
   # X11(width=17, height=10)
   png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_2",
                          sprintf("myc_motifs_10e-6_classification_%dclass.png", k)),
       units="in", res=720, width=18, height=12)
     m = matrix(1:10, nrow=5, ncol=2, byrow=F)
     layout(m)
     # order from most to least probable class
     ord      = order(model.prob, decreasing=T)
     ref.open = model.open[ord,, drop=F]
     ref.nucl = model.nucl[ord,, drop=F]
     ref.seq  = model.seq[,,ord, drop=F]
     prob     = model.prob[ord]
     class    = c(1:nrow(ref.open))[ord]
     for(i in 1:nrow(ref.open))
     { # plot logo
       plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
                 main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
       # x-axis
-      x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
-      x.at  = (x.lab + ncol(ref.open)) / 2
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
       axis(1, at=x.at, labels=x.lab)
       # y-axis is [0,1] for min/max signal
-      x.at = seq(0, 1, 0.5)
-      axis(2, at=x.at, labels=x.at)
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
     }
+    # inlets with center
     row_n = 1 # row counter
     col_n = 1 # column counter
     for(i in 1:nrow(ref.open))
     { # plot logo center
       right  = 0.5*col_n - 0.01
       left   = right - 0.2
       bottom = 1-(row_n*(0.2))+0.05
       top    = bottom + 0.15
       par(fig=c(left, right, bottom, top), new=T)
-      idx = 380:420
+      idx = (391-1-20):(391+1+20)
       plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
       # xaxis
-      x.at = 1:length(idx)
-      axis(1, at=x.at, labels=x.at)
+      x.at = seq(1, length(idx), length.out = 3)
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+      axis(1, at=x.at, labels=x.lab)
       # yaxis
-      x.at = seq(0, 2, by=1)
-      axis(2, at=x.at, labels=x.at)
+      axis(2, at=y.at, labels=y.lab)
       row_n = row_n + 1
       if(i %% 5 == 0)
       { col_n = col_n + 1
-        row_n = 1
+      row_n = 1
       }
     }
   dev.off()
 }
 
diff --git a/scripts/10xgenomics_PBMC_5k_classification_2/classification_myc_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_2/classification_myc_motif.sh
index 0b1d83a..231485b 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_2/classification_myc_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_2/classification_myc_motif.sh
@@ -1,37 +1,36 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_2'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/myc_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/myc_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/myc_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/myc_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=3
+n_core=12
 
 # open chromatin and nucleosomes
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'myc_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_open --seq $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/EMJoint     --read $file_mat_open  --seq  $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 	
 done
diff --git a/scripts/10xgenomics_PBMC_5k_classification_2/classification_sp1_motif.R b/scripts/10xgenomics_PBMC_5k_classification_2/classification_sp1_motif.R
index f53e34b..24d95e0 100644
--- a/scripts/10xgenomics_PBMC_5k_classification_2/classification_sp1_motif.R
+++ b/scripts/10xgenomics_PBMC_5k_classification_2/classification_sp1_motif.R
@@ -1,96 +1,98 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 library(seqLogo)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # the minimum number of classes searched
 k.min = 1
 # the maximum number of classes searched
 k.max = 10
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
 ################## sequence patterns around sp1 motifs ##################
 
 for(k in k.min:k.max)
 {
   # open chromatin
   data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_2", 
                                           sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_model.mat", k)))
   model.open = data$models
   model.prob = data$prob
   data = NULL
   # nucleosomes
   model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_2",
                                           sprintf("sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_%dclass_model.mat", k)))$models
   # sequence
   model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_classification_2",
                                               sprintf("sp1_motifs_10e-7_sequences_%dclass_model.mat", k)))$models
   
   # plot classes
   col = brewer.pal(3, "Set1")
   # X11(width=17, height=10)
   png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_2",
                          sprintf("sp1_motifs_10e-7_classification_%dclass.png", k)),
       units="in", res=720, width=18, height=12)
     m = matrix(1:10, nrow=5, ncol=2, byrow=F)
     layout(m)
     # order from most to least probable class
     ord      = order(model.prob, decreasing=T)
     ref.open = model.open[ord,, drop=F]
     ref.nucl = model.nucl[ord,, drop=F]
     ref.seq  = model.seq[,,ord, drop=F]
     prob     = model.prob[ord]
     class    = c(1:nrow(ref.open))[ord]
     for(i in 1:nrow(ref.open))
     { # plot logo
       plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
                 main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
       # x-axis
-      x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
-      x.at  = (x.lab + ncol(ref.open)) / 2
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
       axis(1, at=x.at, labels=x.lab)
       # y-axis is [0,1] for min/max signal
-      x.at = seq(0, 1, 0.5)
-      axis(2, at=x.at, labels=x.at)
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
     }
+    # inlets with center
     row_n = 1 # row counter
     col_n = 1 # column counter
     for(i in 1:nrow(ref.open))
     { # plot logo center
       right  = 0.5*col_n - 0.01
       left   = right - 0.2
       bottom = 1-(row_n*(0.2))+0.05
       top    = bottom + 0.15
       par(fig=c(left, right, bottom, top), new=T)
-      idx = 380:420
+      idx = (391-1-20):(391+1+20)
       plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
       # xaxis
-      x.at = 1:length(idx)
-      axis(1, at=x.at, labels=x.at)
+      x.at = seq(1, length(idx), length.out = 3)
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+      axis(1, at=x.at, labels=x.lab)
       # yaxis
-      x.at = seq(0, 2, by=1)
-      axis(2, at=x.at, labels=x.at)
+      axis(2, at=y.at, labels=y.lab)
       row_n = row_n + 1
       if(i %% 5 == 0)
       { col_n = col_n + 1
         row_n = 1
       }
     }
   dev.off()
 }
 
diff --git a/scripts/10xgenomics_PBMC_5k_classification_2/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_2/classification_sp1_motif.sh
index 2a18d68..7ba3cf0 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_2/classification_sp1_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_2/classification_sp1_motif.sh
@@ -1,37 +1,35 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_2'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/sp1_motifs_10e-7_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=3
+n_core=12
 
 # open chromatin and nucleosomes
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'sp1_motifs_10e-7_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_open --seq $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
-	
+	bin/EMJoint     --read $file_mat_open  --seq  $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod3
 done
diff --git a/scripts/10xgenomics_PBMC_5k_classification_3/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_3/classification_ctcf_motif.sh
index f47c2de..452d48c 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_3/classification_ctcf_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_3/classification_ctcf_motif.sh
@@ -1,39 +1,38 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_3'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_nucl="$data_dir/ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=8
+n_core=12
 
-# open chromatin and nucleosomes
+# sequences
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_sequences_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod4=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	# bin/ChIPPartitioning --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	# bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	# bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
-	# bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
+	bin/EMSequence  --seq  $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
 done
diff --git a/scripts/10xgenomics_PBMC_5k_classification_3/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_3/classification_sp1_motif.sh
index 0779c14..a37b533 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_3/classification_sp1_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_3/classification_sp1_motif.sh
@@ -1,39 +1,38 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_3'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/sp1_motifs_10e-7_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat"
 file_mat_nucl="$data_dir/sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=8
+n_core=12
 
-# open chromatin and nucleosomes
+# sequences
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_sequences_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod4=$results_dir/'sp1_motifs_10e-7_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
+	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
 done
diff --git a/scripts/10xgenomics_PBMC_5k_classification_4/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_4/classification_ctcf_motif.sh
index 621241a..e91d7fb 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_4/classification_ctcf_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_4/classification_ctcf_motif.sh
@@ -1,39 +1,38 @@
 # some paths
 ## directories
 results_dir='results/10xgenomics_PBMC_5k_classification_4'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
 file_mat_nucl="$data_dir/ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
 n_shift='1'
-seeding='random'
-n_core=8
+n_core=12
 
-# open chromatin and nucleosomes
+# sequences
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_sequences_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod4=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
+	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
 done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks.R b/scripts/10xgenomics_PBMC_5k_classification_4/classification_sp1_motif.R
similarity index 84%
copy from scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks.R
copy to scripts/10xgenomics_PBMC_5k_classification_4/classification_sp1_motif.R
index 93d8eae..3dc0ab1 100644
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks.R
+++ b/scripts/10xgenomics_PBMC_5k_classification_4/classification_sp1_motif.R
@@ -1,96 +1,96 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 library(seqLogo)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # the minimum number of classes searched
 k.min = 1
 # the maximum number of classes searched
 k.max = 10
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
-################## sequence patterns around ctcf motifs ##################
+################## sequence patterns around sp1 motifs ##################
 
 for(k in k.min:k.max)
 {
   # open chromatin
   data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_4", 
-                                          sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_model.mat", k)))
+                                          sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_model.mat", k)))
   model.open = data$models
   model.prob = data$prob
   data = NULL
   # nucleosomes
   model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_4",
-                                          sprintf("ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center_%dclass_model.mat", k)))$models
+                                          sprintf("sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_%dclass_model.mat", k)))$models
   # sequence
   model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_classification_4",
-                                              sprintf("ctcf_motifs_10e-6_sequences_%dclass_model.mat", k)))$models
+                                              sprintf("sp1_motifs_10e-7_sequences_%dclass_model.mat", k)))$models
   
   # plot classes
   col = brewer.pal(3, "Set1")
   # X11(width=17, height=10)
   png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_4",
-                         sprintf("ctcf_motifs_10e-6_classification_sequences_%dclass.png", k)),
+                         sprintf("sp1_motifs_10e-7_classification_sequences_%dclass.png", k)),
       units="in", res=720, width=18, height=12)
     m = matrix(1:10, nrow=5, ncol=2, byrow=F)
     layout(m)
     # order from most to least probable class
     ord      = order(model.prob, decreasing=T)
     ref.open = model.open[ord,, drop=F]
     ref.nucl = model.nucl[ord,, drop=F]
     ref.seq  = model.seq[,,ord, drop=F]
     prob     = model.prob[ord]
     class    = c(1:nrow(ref.open))[ord]
     for(i in 1:nrow(ref.open))
     { # plot logo
       plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
                 main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
       # x-axis
       x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
       x.at  = (x.lab + ncol(ref.open)) / 2
       axis(1, at=x.at, labels=x.lab)
       # y-axis is [0,1] for min/max signal
       x.at = seq(0, 1, 0.5)
       axis(2, at=x.at, labels=x.at)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
     }
     row_n = 1 # row counter
     col_n = 1 # column counter
     for(i in 1:nrow(ref.open))
     { # plot logo center
       right  = 0.5*col_n - 0.01
       left   = right - 0.2
       bottom = 1-(row_n*(0.2))+0.05
       top    = bottom + 0.15
       par(fig=c(left, right, bottom, top), new=T)
       idx = 380:420
       plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
       # xaxis
       x.at = 1:length(idx)
       axis(1, at=x.at, labels=x.at)
       # yaxis
       x.at = seq(0, 2, by=1)
       axis(2, at=x.at, labels=x.at)
       row_n = row_n + 1
       if(i %% 5 == 0)
       { col_n = col_n + 1
         row_n = 1
       }
     }
     dev.off()
 }
 
diff --git a/scripts/10xgenomics_PBMC_5k_classification_3/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_classification_4/classification_sp1_motif.sh
similarity index 63%
copy from scripts/10xgenomics_PBMC_5k_classification_3/classification_sp1_motif.sh
copy to scripts/10xgenomics_PBMC_5k_classification_4/classification_sp1_motif.sh
index 0779c14..5381f5c 100755
--- a/scripts/10xgenomics_PBMC_5k_classification_3/classification_sp1_motif.sh
+++ b/scripts/10xgenomics_PBMC_5k_classification_4/classification_sp1_motif.sh
@@ -1,39 +1,38 @@
 # some paths
 ## directories
-results_dir='results/10xgenomics_PBMC_5k_classification_3'
+results_dir='results/10xgenomics_PBMC_5k_classification_4'
 data_dir='results/10xgenomics_PBMC_5k'
 ## input
 file_mat_open="$data_dir/sp1_motifs_10e-7_open_bin1bp_read_atac.mat"
 file_mat_1nucl="$data_dir/sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat"
 file_mat_nucl="$data_dir/sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center.mat"
 file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"
 
 ## file with seeds
 file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'
 
 mkdir -p $results_dir
 touch $file_seed
 
 # parameters
 n_iter='20'
-n_shift='21'
-seeding='random'
-n_core=8
+n_shift='1'
+n_core=12
 
-# open chromatin and nucleosomes
+# sequences
 for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_sequences_'$k'class_prob.mat4d'
 	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_model.mat'
 	file_mod2=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod3=$results_dir/'sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
 	file_mod4=$results_dir/'sp1_motifs_10e-7_sequences_'$k'class_model.mat'
 	file_aic=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-	bin/ProbToModel      --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
-	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
+	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
+	bin/ProbToModel --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
 done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks/analysis_peaks.sh b/scripts/10xgenomics_PBMC_5k_peaks/analysis_peaks.sh
new file mode 100755
index 0000000..8869f9e
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks/analysis_peaks.sh
@@ -0,0 +1,35 @@
+# some paths
+## directories
+data_dir='data/10xgenomics_PBMC_5k'
+seq_dir='data/genomes'
+results_dir='data/10xgenomics_PBMC_5k_peaks'
+## input
+file_bed_rmsk=$data_dir/'atac_v1_pbmc_5k_peaks_rmsk.bed'
+file_bam_open="$data_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
+file_bai_open="$data_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
+file_bam_nucl="$data_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam"
+file_bai_nucl="$data_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai"
+file_seq="$seq_dir/hg19.fasta"
+
+mkdir -p $results_dir
+
+# matrix creation
+## 1kb sequences
+file_mat_seq_1kb="$results_dir/peaks_rmsk_sequences_1kb.mat"
+bin/SequenceMatrixCreator --bed $file_bed_rmsk --fasta $file_seq --from -500 --to 500 > $file_mat_seq_1kb
+
+## open chromatin around peaks
+for method in 'read_atac'
+do
+	file_mat_open_1kb="$results_dir/peaks_rmsk_openchromatin_1kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk --bam $file_bam_open --bai $file_bai_open --from -500 --to 500   --binSize 1 --method $method > $file_mat_open_1kb
+done
+
+## all nucleosomes around peaks
+for method in 'fragment_center'
+do
+	file_mat_nucl_1kb="$results_dir/peaks_rmsk_nucleosomes_1kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk --bam $file_bam_nucl --bai $file_bai_nucl --from -500 --to 500   --binSize 1 --method $method > $file_mat_nucl_1kb
+done
+
+
diff --git a/scripts/10xgenomics_PBMC_5k_peaks/analysis_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks/analysis_peaks_sampled.sh
new file mode 100755
index 0000000..1712862
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks/analysis_peaks_sampled.sh
@@ -0,0 +1,51 @@
+# some paths
+## directories
+data_dir='data/10xgenomics_PBMC_5k'
+seq_dir='data/genomes'
+results_dir='data/10xgenomics_PBMC_5k_peaks'
+## input
+file_bed=$data_dir'/atac_v1_pbmc_5k_peaks.bed'
+file_bam_open="$data_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam"
+file_bai_open="$data_dir/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai"
+file_bam_nucl="$data_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam"
+file_bai_nucl="$data_dir/atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai"
+file_seq="$seq_dir/hg19.fasta"
+file_rmsk="$seq_dir/hg19_rmsk.bed"
+
+mkdir -p $results_dir
+
+# filter out peaks with >=30% repeated region inside
+file_bed_rmsk=$data_dir/'atac_v1_pbmc_5k_peaks_rmsk.bed'
+bin/bedtools/bedtools subtract -A -f 0.3 -a $file_bed -b $file_rmsk > $file_bed_rmsk
+
+# sampled from bed
+file_bed_rmsk_sample=$data_dir'/atac_v1_pbmc_5k_peaks_rmsk_sampled.bed'
+shuf $file_bed | head -n 10000 > $file_bed_rmsk_sample
+
+# matrix creation
+## 1kb sequences
+file_mat_seq_1kb="$results_dir/peaks_rmsk_sampled_sequences_1kb.mat"
+bin/SequenceMatrixCreator --bed $file_bed_rmsk_sample --fasta $file_seq --from -500 --to 500 > $file_mat_seq_1kb
+## 2kb sequences
+file_mat_seq_2kb="$results_dir/peaks_rmsk_sampled_sequences_2kb.mat"
+bin/SequenceMatrixCreator --bed $file_bed_rmsk_sample --fasta $file_seq --from -1000 --to 1000 > $file_mat_seq_2kb
+
+## open chromatin around peaks
+for method in 'read_atac'
+do
+	file_mat_open_1kb="$results_dir/peaks_rmsk_sampled_openchromatin_1kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk_sample --bam $file_bam_open --bai $file_bai_open --from -500 --to 500   --binSize 1 --method $method > $file_mat_open_1kb
+	file_mat_open_2kb="$results_dir/peaks_rmsk_sampled_openchromatin_2kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk_sample --bam $file_bam_open --bai $file_bai_open --from -1000 --to 1000 --binSize 1 --method $method > $file_mat_open_2kb
+done
+
+## all nucleosomes around peaks
+for method in 'fragment_center'
+do
+	file_mat_nucl_1kb="$results_dir/peaks_rmsk_sampled_nucleosomes_1kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk_sample --bam $file_bam_nucl --bai $file_bai_nucl --from -500 --to 500   --binSize 1 --method $method > $file_mat_nucl_1kb
+	file_mat_nucl_2kb="$results_dir/peaks_rmsk_sampled_nucleosomes_2kb_$method.mat"
+	bin/CorrelationMatrixCreator --bed $file_bed_rmsk_sample --bam $file_bam_nucl --bai $file_bai_nucl --from -1000 --to 1000 --binSize 1 --method $method > $file_mat_nucl_2kb
+done
+
+
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/analysis_test_sampled.R b/scripts/10xgenomics_PBMC_5k_peaks_classification_1/analysis_test_sampled.R
new file mode 100644
index 0000000..4e371a7
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_1/analysis_test_sampled.R
@@ -0,0 +1,96 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the number of classes searched
+n.classes = c(10, 20, 30)
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around ctcf motifs ##################
+
+for(k in n.classes)
+{ 
+  # sequence
+  data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_1",
+                                        sprintf("peaks_rmsk_sampled_sequences_1kb_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  data = NULL
+  
+  # open chromatin
+  model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_1", 
+                                          sprintf("peaks_rmsk_sampled_openchromatin_1kb_read_atac_%dclass_model.mat", k)))$models
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_1",
+                                          sprintf("peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_%dclass_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=26, height=12)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_peaks_classification_1",
+                         sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:30, nrow=6, ncol=5, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    # inlets with center
+    # row_n = 1 # row counter
+    # col_n = 1 # column counter
+    # for(i in 1:nrow(ref.open))
+    # { # plot logo center
+    #   right  = 0.5*col_n - 0.01
+    #   left   = right - 0.2
+    #   bottom = 1-(row_n*(0.2))+0.05
+    #   top    = bottom + 0.15
+    #   par(fig=c(left, right, bottom, top), new=T)
+    #   idx = (391-1-20):(391+1+20)
+    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    #   # xaxis
+    #   x.at = seq(1, length(idx), length.out = 3)
+    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+    #   axis(1, at=x.at, labels=x.lab)
+    #   # yaxis
+    #   axis(2, at=y.at, labels=y.lab)
+    #   row_n = row_n + 1
+    #   if(i %% 5 == 0)
+    #   { col_n = col_n + 1
+    #     row_n = 1
+    #   }
+    # }
+  dev.off()
+}
+
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks.sh
deleted file mode 100755
index 0ea6153..0000000
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-# some paths
-## directories
-results_dir='results/10xgenomics_PBMC_5k_peaks_classification_1'
-data_dir='results/10xgenomics_PBMC_5k'
-## input
-file_mat_open="$data_dir/peaks_open_bin1bp_read_atac.mat"
-file_mat_nucl="$data_dir/peaks_nucleosomes_bin1bp_read_atac.mat"
-file_mat_seq="$data_dir/peaks_sequences.mat"
-
-## file with seeds
-file_seed=$results_dir'/peaks_seed.txt'
-
-mkdir -p $results_dir
-touch $file_seed
-
-# parameters
-n_iter='20'
-n_shift='201'
-seeding='random'
-n_core=8
-
-# open chromatin and nucleosomes
-# for k in 10 20 30
-# do
-# 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
-# 	file_prob=$results_dir/'peaks_sequences_'$k'class_prob.mat4d'
-# 	file_mod1=$results_dir/'peaks_openchromatin_bin1bp_'$k'class_model.mat'
-# 	file_mod2=$results_dir/'peaks_nucleosomes_bin1bp_'$k'class_model.mat'
-# 	file_mod3=$results_dir/'peaks_sequences_'$k'class_model.mat'
-# 	echo "$file_prob $seed" >> $file_seed
-# 	bin/ChIPPartitioning --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-# 	bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-# 	bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-# 	bin/ProbToModel      --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
-# 	bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
-# done
-
-k=5
-seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
-file_prob=$results_dir/'peaks_sequences_'$k'class_prob.mat4d'
-file_mod1=$results_dir/'peaks_openchromatin_bin1bp_'$k'class_model.mat'
-file_mod2=$results_dir/'peaks_nucleosomes_bin1bp_'$k'class_model.mat'
-file_mod3=$results_dir/'peaks_sequences_'$k'class_model.mat'
-echo "$file_prob $seed" >> $file_seed
-bin/ChIPPartitioning --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-bin/ProbToModel      --read $file_mat_open  --prob $file_prob --thread $n_core 1> $file_mod1
-bin/ProbToModel      --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
-bin/ProbToModel      --read $file_mat_nucl  --prob $file_prob --thread $n_core 1> $file_mod3
-bin/ProbToModel      --seq  $file_mat_seq   --prob $file_prob --thread $n_core 1> $file_mod4
-
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks_sampled.sh
new file mode 100755
index 0000000..618a604
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks_sampled.sh
@@ -0,0 +1,35 @@
+
+# paths
+## dir
+data_dir="data/10xgenomics_PBMC_5k_peaks"
+results_dir="results/10xgenomics_PBMC_5k_peaks_classification_1"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='981'
+n_core=24
+
+# classify
+for k in 10 20 30
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --bgclass --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_2/analysis_test_sampled.R b/scripts/10xgenomics_PBMC_5k_peaks_classification_2/analysis_test_sampled.R
new file mode 100644
index 0000000..fba2a15
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_2/analysis_test_sampled.R
@@ -0,0 +1,96 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the number of classes searched
+n.classes = c(10, 20, 30)
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around ctcf motifs ##################
+
+for(k in n.classes)
+{ 
+  # sequence
+  data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_2",
+                                        sprintf("peaks_rmsk_sampled_sequences_1kb_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  data = NULL
+  
+  # open chromatin
+  model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_2", 
+                                          sprintf("peaks_rmsk_sampled_openchromatin_1kb_read_atac_%dclass_model.mat", k)))$models
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_2",
+                                          sprintf("peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_%dclass_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=26, height=12)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_peaks_classification_2",
+                         sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:30, nrow=6, ncol=5, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    # inlets with center
+    # row_n = 1 # row counter
+    # col_n = 1 # column counter
+    # for(i in 1:nrow(ref.open))
+    # { # plot logo center
+    #   right  = 0.5*col_n - 0.01
+    #   left   = right - 0.2
+    #   bottom = 1-(row_n*(0.2))+0.05
+    #   top    = bottom + 0.15
+    #   par(fig=c(left, right, bottom, top), new=T)
+    #   idx = (391-1-20):(391+1+20)
+    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    #   # xaxis
+    #   x.at = seq(1, length(idx), length.out = 3)
+    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+    #   axis(1, at=x.at, labels=x.lab)
+    #   # yaxis
+    #   axis(2, at=y.at, labels=y.lab)
+    #   row_n = row_n + 1
+    #   if(i %% 5 == 0)
+    #   { col_n = col_n + 1
+    #     row_n = 1
+    #   }
+    # }
+  dev.off()
+}
+
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_2/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_2/classification_peaks_sampled.sh
new file mode 100755
index 0000000..03c84de
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_2/classification_peaks_sampled.sh
@@ -0,0 +1,35 @@
+
+# paths
+## dir
+data_dir="data/10xgenomics_PBMC_5k_peaks"
+results_dir="results/10xgenomics_PBMC_5k_peaks_classification_2"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='981'
+n_core=24
+
+# classify
+for k in 10 20 30
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_3/analysis_test_sampled.R b/scripts/10xgenomics_PBMC_5k_peaks_classification_3/analysis_test_sampled.R
new file mode 100644
index 0000000..d0bf0d6
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_3/analysis_test_sampled.R
@@ -0,0 +1,96 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the number of classes searched
+n.classes = c(10, 20, 30)
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around ctcf motifs ##################
+
+for(k in n.classes)
+{ 
+  # sequence
+  data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_3",
+                                        sprintf("peaks_rmsk_sampled_sequences_1kb_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  data = NULL
+  
+  # open chromatin
+  model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_3", 
+                                          sprintf("peaks_rmsk_sampled_openchromatin_1kb_read_atac_%dclass_model.mat", k)))$models
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_3",
+                                          sprintf("peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_%dclass_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=26, height=12)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_peaks_classification_3",
+                         sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:30, nrow=6, ncol=5, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    # inlets with center
+    # row_n = 1 # row counter
+    # col_n = 1 # column counter
+    # for(i in 1:nrow(ref.open))
+    # { # plot logo center
+    #   right  = 0.5*col_n - 0.01
+    #   left   = right - 0.2
+    #   bottom = 1-(row_n*(0.2))+0.05
+    #   top    = bottom + 0.15
+    #   par(fig=c(left, right, bottom, top), new=T)
+    #   idx = (391-1-20):(391+1+20)
+    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    #   # xaxis
+    #   x.at = seq(1, length(idx), length.out = 3)
+    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+    #   axis(1, at=x.at, labels=x.lab)
+    #   # yaxis
+    #   axis(2, at=y.at, labels=y.lab)
+    #   row_n = row_n + 1
+    #   if(i %% 5 == 0)
+    #   { col_n = col_n + 1
+    #     row_n = 1
+    #   }
+    # }
+  dev.off()
+}
+
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_3/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_3/classification_peaks_sampled.sh
new file mode 100755
index 0000000..a396e60
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_3/classification_peaks_sampled.sh
@@ -0,0 +1,35 @@
+
+# paths
+## dir
+data_dir="data/10xgenomics_PBMC_5k_peaks"
+results_dir="results/10xgenomics_PBMC_5k_peaks_classification_3"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='981'
+n_core=24
+
+# classify
+for k in 10 20 30
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_openchromatin-sequences_1kb_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMJoint     --read $file_mat_open --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_4/analysis_test_sampled.R b/scripts/10xgenomics_PBMC_5k_peaks_classification_4/analysis_test_sampled.R
new file mode 100644
index 0000000..df6959c
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_4/analysis_test_sampled.R
@@ -0,0 +1,96 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the number of classes searched
+n.classes = c(17, 20, 30)
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around ctcf motifs ##################
+
+for(k in n.classes)
+{ 
+  # sequence
+  data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_4",
+                                        sprintf("peaks_rmsk_sampled_sequences_1kb_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  data = NULL
+  
+  # open chromatin
+  model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_4", 
+                                          sprintf("peaks_rmsk_sampled_openchromatin_1kb_read_atac_%dclass_model.mat", k)))$models
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_4",
+                                          sprintf("peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_%dclass_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=26, height=12)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_peaks_classification_4",
+                         sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:30, nrow=6, ncol=5, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    # inlets with center
+    # row_n = 1 # row counter
+    # col_n = 1 # column counter
+    # for(i in 1:nrow(ref.open))
+    # { # plot logo center
+    #   right  = 0.5*col_n - 0.01
+    #   left   = right - 0.2
+    #   bottom = 1-(row_n*(0.2))+0.05
+    #   top    = bottom + 0.15
+    #   par(fig=c(left, right, bottom, top), new=T)
+    #   idx = (391-1-20):(391+1+20)
+    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    #   # xaxis
+    #   x.at = seq(1, length(idx), length.out = 3)
+    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+    #   axis(1, at=x.at, labels=x.lab)
+    #   # yaxis
+    #   axis(2, at=y.at, labels=y.lab)
+    #   row_n = row_n + 1
+    #   if(i %% 5 == 0)
+    #   { col_n = col_n + 1
+    #     row_n = 1
+    #   }
+    # }
+  dev.off()
+}
+
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_4/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_4/classification_peaks_sampled.sh
new file mode 100755
index 0000000..d87ff4e
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_4/classification_peaks_sampled.sh
@@ -0,0 +1,55 @@
+
+# paths
+## dir
+data_dir="data/10xgenomics_PBMC_5k_peaks"
+pwm_dir="data/pwm/jaspar_2018_clustering/"
+results_dir="results/10xgenomics_PBMC_5k_peaks_classification_4"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='971'
+n_core=30
+## PWM files
+jun="$pwm_dir/cluster_3_node_23_20_motifs_prob.mat"
+hif1a="$pwm_dir/cluster_4_node_31_3_motifs_prob.mat"
+myc="$pwm_dir/cluster_4_node_22_4_motifs_prob.mat"
+pu1="$pwm_dir/cluster_7_node_13_2_motifs_prob.mat"
+cebpb="$pwm_dir/cluster_5_node_20_5_motifs_prob.mat"
+irf4="$pwm_dir/cluster_31_node_4_5_motifs_prob.mat"
+irf2="$pwm_dir/cluster_31_node_5_2_motifs_prob.mat"
+lhx3="$pwm_dir/cluster_1_node_74_2_motifs_prob.mat"
+foxh1="$pwm_dir/cluster_66_1_motifs_prob.mat"
+sox3="$pwm_dir/cluster_33_node_1_2_motifs_prob.mat"
+mef2c="$pwm_dir/cluster_20_4_motifs_prob.mat"
+elf5="$pwm_dir/cluster_7_node_17_5_motifs_prob.mat"
+stat6="$pwm_dir/cluster_32_node_STAT6_1_motifs_prob.mat"
+nfe2="$pwm_dir/cluster_3_node_24_4_motifs_prob.mat"
+ahr="$pwm_dir/cluster_4_node_30_2_motifs_prob.mat"
+e2f2="$pwm_dir/cluster_39_node_1_2_motifs_prob.mat"
+ctcf="$pwm_dir/cluster_48_node_ctcf_1_motifs_prob.mat"
+
+
+# classify
+for k in 30 20 17
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq  $file_mat_seq --class $k --motifs $jun,$hif1a,$myc,$pu1,$cebpb,$irf4,$irf2,$lhx3,$foxh1,$sox3,$mef2c,$elf5,$stat6,$nfe2,$ahr,$e2f2,$ctcf --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_5/analysis_test_sampled.R b/scripts/10xgenomics_PBMC_5k_peaks_classification_5/analysis_test_sampled.R
new file mode 100644
index 0000000..41ffc20
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_5/analysis_test_sampled.R
@@ -0,0 +1,96 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the number of classes searched
+n.classes = c(20, 30, 40)
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around ctcf motifs ##################
+
+for(k in n.classes)
+{ 
+  # sequence
+  data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_5",
+                                        sprintf("peaks_rmsk_sampled_sequences_1kb_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  data = NULL
+  
+  # open chromatin
+  model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_5", 
+                                          sprintf("peaks_rmsk_sampled_openchromatin_1kb_read_atac_%dclass_model.mat", k)))$models
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_5",
+                                          sprintf("peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_%dclass_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=26, height=12)
+  png(filename=file.path("results", "10xgenomics_PBMC_5k_peaks_classification_5",
+                         sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:42, nrow=6, ncol=7, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    # inlets with center
+    # row_n = 1 # row counter
+    # col_n = 1 # column counter
+    # for(i in 1:nrow(ref.open))
+    # { # plot logo center
+    #   right  = 0.5*col_n - 0.01
+    #   left   = right - 0.2
+    #   bottom = 1-(row_n*(0.2))+0.05
+    #   top    = bottom + 0.15
+    #   par(fig=c(left, right, bottom, top), new=T)
+    #   idx = (391-1-20):(391+1+20)
+    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    #   # xaxis
+    #   x.at = seq(1, length(idx), length.out = 3)
+    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+    #   axis(1, at=x.at, labels=x.lab)
+    #   # yaxis
+    #   axis(2, at=y.at, labels=y.lab)
+    #   row_n = row_n + 1
+    #   if(i %% 5 == 0)
+    #   { col_n = col_n + 1
+    #     row_n = 1
+    #   }
+    # }
+  dev.off()
+}
+
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_5/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_5/classification_peaks_sampled.sh
new file mode 100755
index 0000000..5f54d1d
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_5/classification_peaks_sampled.sh
@@ -0,0 +1,35 @@
+
+# paths
+## dir
+data_dir="data/10xgenomics_PBMC_5k_peaks"
+results_dir="results/10xgenomics_PBMC_5k_peaks_classification_5"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='991'
+n_core=24
+
+# classify
+for k in 20 30 40
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_6/analysis_test_sampled.R b/scripts/10xgenomics_PBMC_5k_peaks_classification_6/analysis_test_sampled.R
new file mode 100644
index 0000000..d4fc044
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_6/analysis_test_sampled.R
@@ -0,0 +1,95 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the number of classes searched
+n.classes = c(23)
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around ctcf motifs ##################
+
+for(k in n.classes)
+{ 
+  # sequence
+  data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_6",
+                                        sprintf("peaks_rmsk_sampled_sequences_1kb_%dclass_model_extended.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  data = NULL
+  
+  # open chromatin
+  model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_6", 
+                                          sprintf("peaks_rmsk_sampled_openchromatin_1kb_read_atac_%dclass_model_extended.mat", k)))$models
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_6",
+                                          sprintf("peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_%dclass_model_extended.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  X11(width=26, height=12)
+  # png(filename=file.path("results", "10xgenomics_PBMC_5k_peaks_classification_6",
+  #                        sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
+  #     units="in", res=720, width=18, height=12)
+    m = matrix(1:24, nrow=6, ncol=4, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F][,316:716]
+    ref.nucl = model.nucl[ord,, drop=F][,316:716]
+    ref.seq  = model.seq[,,ord, drop=F][,316:716,]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    # inlets with center
+    # row_n = 1 # row counter
+    # col_n = 1 # column counter
+    # for(i in 1:nrow(ref.open))
+    # { # plot logo center
+    #   right  = 0.25*col_n + 0.03
+    #   left   = right - 0.15
+    #   bottom = 1-(row_n*(0.2))+0.05
+    #   top    = bottom + 0.15
+    #   par(fig=c(left, right, bottom, top), new=T)
+    #   idx = (516-1-10):(516+1+10)
+    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    #   # xaxis
+    #   x.at = seq(1, length(idx), length.out = 3)
+    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+    #   axis(1, at=x.at, labels=x.lab)
+    #   # yaxis
+    #   axis(2, at=y.at, labels=y.lab)
+    #   row_n = row_n + 1
+    #   if(i %% 5 == 0)
+    #   { col_n = col_n + 1
+    #     row_n = 1
+    #   }
+    # }
+  dev.off()
+}
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_6/classification_peaks_sampled.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_6/classification_peaks_sampled.sh
new file mode 100755
index 0000000..1efedd4
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_6/classification_peaks_sampled.sh
@@ -0,0 +1,76 @@
+
+# paths
+## dir
+data_dir_p="data/10xgenomics_PBMC_5k_peaks"
+data_dir="data/10xgenomics_PBMC_5k"
+pwm_dir="data/pwm/jaspar_2018_clustering/"
+hg19_dir="data/genomes"
+results_dir="results/10xgenomics_PBMC_5k_peaks_classification_6"
+## matrix files
+file_mat_open=$data_dir_p/'peaks_rmsk_sampled_openchromatin_1kb_read_atac.mat'
+file_mat_nucl=$data_dir_p/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center.mat'
+file_mat_seq=$data_dir_p/'peaks_rmsk_sampled_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='1'
+n_shift='971'
+n_core=8
+## PWM files
+jun="$pwm_dir/cluster_3_node_23_20_motifs_prob.mat"
+hif1a="$pwm_dir/cluster_4_node_31_3_motifs_prob.mat"
+myc="$pwm_dir/cluster_4_node_22_4_motifs_prob.mat"
+pu1="$pwm_dir/cluster_7_node_13_2_motifs_prob.mat"
+cebpb="$pwm_dir/cluster_5_node_20_5_motifs_prob.mat"
+irf4="$pwm_dir/cluster_31_node_4_5_motifs_prob.mat"
+irf2="$pwm_dir/cluster_31_node_5_2_motifs_prob.mat"
+lhx3="$pwm_dir/cluster_1_node_74_2_motifs_prob.mat"
+foxh1="$pwm_dir/cluster_66_1_motifs_prob.mat"
+sox3="$pwm_dir/cluster_33_node_1_2_motifs_prob.mat"
+mef2c="$pwm_dir/cluster_20_4_motifs_prob.mat"
+elf5="$pwm_dir/cluster_7_node_17_5_motifs_prob.mat"
+stat6="$pwm_dir/cluster_32_node_STAT6_1_motifs_prob.mat"
+nfe2="$pwm_dir/cluster_3_node_24_4_motifs_prob.mat"
+ahr="$pwm_dir/cluster_4_node_30_2_motifs_prob.mat"
+e2f2="$pwm_dir/cluster_39_node_1_2_motifs_prob.mat"
+ctcf="$pwm_dir/cluster_48_node_ctcf_1_motifs_prob.mat"
+klf="$pwm_dir/cluster_28_node_14_3_motifs_prob.mat"
+nr4a1="$pwm_dir/cluster_2_node_12_4_motifs_prob.mat"
+egr="$pwm_dir/cluster_28_node_13_4_motifs_prob.mat"
+gata="$pwm_dir/cluster_21_node_5_6_motifs_prob.mat"
+nfat="$pwm_dir/cluster_19_node_2_3_motifs_prob.mat"
+runx="$pwm_dir/cluster_38_node_3_3_motifs_prob.mat"
+
+# classify
+for k in 23
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model.mat'
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq  $file_mat_seq --class $k --motifs $jun,$hif1a,$myc,$pu1,$cebpb,$irf4,$irf2,$lhx3,$foxh1,$sox3,$mef2c,$elf5,$stat6,$nfe2,$ahr,$e2f2,$ctcf,$klf,$nr4a1,$egr,$gata,$nfat,$runx --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+
+	# extend models
+	file_mod1_ext=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_read_atac_'$k'class_model_extended.mat'
+	file_mod2_ext=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_'$k'class_model_extended.mat'
+	file_mod3_ext=$results_dir/'peaks_rmsk_sampled_sequences_1kb_'$k'class_model_extended.mat'
+	file_bed=$data_dir/'atac_v1_pbmc_5k_peaks_rmsk_sampled.bed'
+	file_fasta=$hg19_dir/'hg19.fasta'
+	file_bam_open=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam'
+	file_bai_open=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai'
+	file_bam_nucl=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam'
+	file_bai_nucl=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai'
+	bin/ReadModelExtender --bed $file_bed --bam $file_bam_open --bai $file_bai_open --prob $file_prob --from -500 --to 500 --ext 1000 --binSize 1 --method 'read_atac'       --thread $n_core > $file_mod1_ext
+	bin/ReadModelExtender --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --prob $file_prob --from -500 --to 500 --ext 1000 --binSize 1 --method 'fragment_center' --thread $n_core > $file_mod2_ext
+	bin/SequenceModelExtender --bed $file_bed --fasta $file_fasta --prob $file_prob --from -500 --to 500 --ext 1000 --thread $n_core > $file_mod3_ext
+done
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks.R b/scripts/10xgenomics_PBMC_5k_peaks_classification_7/analysis_test.R
similarity index 54%
rename from scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks.R
rename to scripts/10xgenomics_PBMC_5k_peaks_classification_7/analysis_test.R
index 93d8eae..33dcd04 100644
--- a/scripts/10xgenomics_PBMC_5k_peaks_classification_1/classification_peaks.R
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_7/analysis_test.R
@@ -1,96 +1,103 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
-library(seqLogo)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
-# the minimum number of classes searched
-k.min = 1
-# the maximum number of classes searched
-k.max = 10
+# the number of classes searched
+n.classes = c(23)
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
 ################## sequence patterns around ctcf motifs ##################
 
-for(k in k.min:k.max)
-{
-  # open chromatin
-  data       = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_4", 
-                                          sprintf("ctcf_motifs_10e-6_open_bin1bp_read_atac_%dclass_model.mat", k)))
-  model.open = data$models
+for(k in n.classes)
+{ 
+  # sequence
+  data = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_6",
+                                        sprintf("peaks_rmsk_sampled_sequences_1kb_%dclass_model_extended.mat", k)))
+  model.seq = data$models
   model.prob = data$prob
   data = NULL
+  
+  # open chromatin
+  model.open = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_6", 
+                                          sprintf("peaks_rmsk_sampled_openchromatin_1kb_read_atac_%dclass_model_extended.mat", k)))$models
   # nucleosomes
-  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_classification_4",
-                                          sprintf("ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center_%dclass_model.mat", k)))$models
-  # sequence
-  model.seq  = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_classification_4",
-                                              sprintf("ctcf_motifs_10e-6_sequences_%dclass_model.mat", k)))$models
+  model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_peaks_classification_6",
+                                          sprintf("peaks_rmsk_sampled_nucleosomes_1kb_fragment_center_%dclass_model_extended.mat", k)))$models
   
   # plot classes
   col = brewer.pal(3, "Set1")
-  # X11(width=17, height=10)
-  png(filename=file.path("results", "10xgenomics_PBMC_5k_classification_4",
-                         sprintf("ctcf_motifs_10e-6_classification_sequences_%dclass.png", k)),
-      units="in", res=720, width=18, height=12)
-    m = matrix(1:10, nrow=5, ncol=2, byrow=F)
+  X11(width=26, height=12)
+  # png(filename=file.path("results", "test_1kb",
+  #                        sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
+  #     units="in", res=720, width=18, height=12)
+    m = matrix(1:24, nrow=6, ncol=4, byrow=F)
     layout(m)
     # order from most to least probable class
     ord      = order(model.prob, decreasing=T)
     ref.open = model.open[ord,, drop=F]
     ref.nucl = model.nucl[ord,, drop=F]
     ref.seq  = model.seq[,,ord, drop=F]
     prob     = model.prob[ord]
     class    = c(1:nrow(ref.open))[ord]
     for(i in 1:nrow(ref.open))
     { # plot logo
       plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
                 main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
       # x-axis
-      x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
-      x.at  = (x.lab + ncol(ref.open)) / 2
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
       axis(1, at=x.at, labels=x.lab)
       # y-axis is [0,1] for min/max signal
-      x.at = seq(0, 1, 0.5)
-      axis(2, at=x.at, labels=x.at)
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
     }
+    # inlets with center
     row_n = 1 # row counter
     col_n = 1 # column counter
     for(i in 1:nrow(ref.open))
     { # plot logo center
       right  = 0.5*col_n - 0.01
       left   = right - 0.2
       bottom = 1-(row_n*(0.2))+0.05
       top    = bottom + 0.15
       par(fig=c(left, right, bottom, top), new=T)
-      idx = 380:420
+      idx = (516-1-20):(516+1+20)
       plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
       # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
       lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
       lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
       # xaxis
-      x.at = 1:length(idx)
-      axis(1, at=x.at, labels=x.at)
+      x.at = seq(1, length(idx), length.out = 3)
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+      axis(1, at=x.at, labels=x.lab)
       # yaxis
-      x.at = seq(0, 2, by=1)
-      axis(2, at=x.at, labels=x.at)
+      axis(2, at=y.at, labels=y.lab)
       row_n = row_n + 1
       if(i %% 5 == 0)
       { col_n = col_n + 1
         row_n = 1
       }
     }
-    dev.off()
+  # dev.off()
 }
 
+m = matrix(1:24, nrow=6, ncol=4, byrow=F)
+layout(m)
+col=brewer.pal(3,"Set1")
+for(i in 1:nrow(model.open))
+{ plot(model.open[i,]/max(model.open[i,]), type='l', lwd=2, col=col[1]) 
+  lines(model.nucl[i,]/max(model.nucl[i,]), type='l', lwd=2, col=col[2]) 
+}
diff --git a/scripts/10xgenomics_PBMC_5k_peaks_classification_7/classification_peaks.sh b/scripts/10xgenomics_PBMC_5k_peaks_classification_7/classification_peaks.sh
new file mode 100755
index 0000000..9850d7a
--- /dev/null
+++ b/scripts/10xgenomics_PBMC_5k_peaks_classification_7/classification_peaks.sh
@@ -0,0 +1,76 @@
+
+# paths
+## dir
+data_dir_p="data/10xgenomics_PBMC_5k_peaks"
+data_dir="data/10xgenomics_PBMC_5k"
+pwm_dir="data/pwm/jaspar_2018_clustering/"
+hg19_dir="data/genomes"
+results_dir="results/10xgenomics_PBMC_5k_peaks_classification_7"
+## matrix files
+file_mat_open=$data_dir_p/'peaks_rmsk_openchromatin_1kb_read_atac.mat'
+file_mat_nucl=$data_dir_p/'peaks_rmsk_nucleosomes_1kb_fragment_center.mat'
+file_mat_seq=$data_dir_p/'peaks_rmsk_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='1'
+n_shift='971'
+n_core=24
+## PWM files
+jun="$pwm_dir/cluster_3_node_23_20_motifs_prob.mat"
+hif1a="$pwm_dir/cluster_4_node_31_3_motifs_prob.mat"
+myc="$pwm_dir/cluster_4_node_22_4_motifs_prob.mat"
+pu1="$pwm_dir/cluster_7_node_13_2_motifs_prob.mat"
+cebpb="$pwm_dir/cluster_5_node_20_5_motifs_prob.mat"
+irf4="$pwm_dir/cluster_31_node_4_5_motifs_prob.mat"
+irf2="$pwm_dir/cluster_31_node_5_2_motifs_prob.mat"
+lhx3="$pwm_dir/cluster_1_node_74_2_motifs_prob.mat"
+foxh1="$pwm_dir/cluster_66_1_motifs_prob.mat"
+sox3="$pwm_dir/cluster_33_node_1_2_motifs_prob.mat"
+mef2c="$pwm_dir/cluster_20_4_motifs_prob.mat"
+elf5="$pwm_dir/cluster_7_node_17_5_motifs_prob.mat"
+stat6="$pwm_dir/cluster_32_node_STAT6_1_motifs_prob.mat"
+nfe2="$pwm_dir/cluster_3_node_24_4_motifs_prob.mat"
+ahr="$pwm_dir/cluster_4_node_30_2_motifs_prob.mat"
+e2f2="$pwm_dir/cluster_39_node_1_2_motifs_prob.mat"
+ctcf="$pwm_dir/cluster_48_node_ctcf_1_motifs_prob.mat"
+klf="$pwm_dir/cluster_28_node_14_3_motifs_prob.mat"
+nr4a1="$pwm_dir/cluster_2_node_12_4_motifs_prob.mat"
+egr="$pwm_dir/cluster_28_node_13_4_motifs_prob.mat"
+gata="$pwm_dir/cluster_21_node_5_6_motifs_prob.mat"
+nfat="$pwm_dir/cluster_19_node_2_3_motifs_prob.mat"
+runx="$pwm_dir/cluster_38_node_3_3_motifs_prob.mat"
+
+# classify
+for k in 23
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sequences_1kb_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_openchromatin_1kb_read_atac_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_nucleosomes_1kb_fragment_center_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sequences_1kb_'$k'class_model.mat'
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence  --seq  $file_mat_seq --class $k --motifs $jun,$hif1a,$myc,$pu1,$cebpb,$irf4,$irf2,$lhx3,$foxh1,$sox3,$mef2c,$elf5,$stat6,$nfe2,$ahr,$e2f2,$ctcf,$klf,$nr4a1,$egr,$gata,$nfat,$runx --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+
+	# extend models
+	file_mod1_ext=$results_dir/'peaks_rmsk_openchromatin_1kb_read_atac_'$k'class_model_extended.mat'
+	file_mod2_ext=$results_dir/'peaks_rmsk_nucleosomes_1kb_fragment_center_'$k'class_model_extended.mat'
+	file_mod3_ext=$results_dir/'peaks_rmsk_sequences_1kb_'$k'class_model_extended.mat'
+	file_bed=$data_dir/'atac_v1_pbmc_5k_peaks_rmsk.bed'
+	file_fasta=$hg19_dir/'hg19.fasta'
+	file_bam_open=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam'
+	file_bai_open=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai'
+	file_bam_nucl=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam'
+	file_bai_nucl=$data_dir/'atac_v1_pbmc_5k_possorted_filtered_nucleosomes.bam.bai'
+	bin/ReadModelExtender --bed $file_bed --bam $file_bam_open --bai $file_bai_open --prob $file_prob --from -500 --to 500 --ext 1000 --binSize 1 --method 'read_atac'       --thread $n_core > $file_mod1_ext
+	bin/ReadModelExtender --bed $file_bed --bam $file_bam_nucl --bai $file_bai_nucl --prob $file_prob --from -500 --to 500 --ext 1000 --binSize 1 --method 'fragment_center' --thread $n_core > $file_mod2_ext
+	bin/SequenceModelExtender --bed $file_bed --fasta $file_fasta --prob $file_prob --from -500 --to 500 --ext 1000 --thread $n_core > $file_mod3_ext
+done
diff --git a/scripts/bulk_sequencing/analysis_cluster_ctcf_dnase_k562.R b/scripts/bulk_sequencing/analysis_cluster_ctcf_dnase_k562.R
index 7377bed..6bf6201 100755
--- a/scripts/bulk_sequencing/analysis_cluster_ctcf_dnase_k562.R
+++ b/scripts/bulk_sequencing/analysis_cluster_ctcf_dnase_k562.R
@@ -1,138 +1,110 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # data
-data.1  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_1class_ref.mat"))
-ref.1   = data.1$references
+data.1  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_1class_ref.mat"))
+ref.1   = data.1$models
 prob.1  = data.1$prob
-aic.1   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_1class_aic.txt")))
+# aic.1   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_1class_aic.txt")))
 data.1  = NULL
 
-data.2  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_2class_ref.mat"))
-ref.2   = data.2$references
+data.2  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_2class_ref.mat"))
+ref.2   = data.2$models
 prob.2  = data.2$prob
-aic.2   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_2class_aic.txt")))
+# aic.2   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_2class_aic.txt")))
 data.2  = NULL
 
-data.3  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_3class_ref.mat"))
-ref.3   = data.3$references
+data.3  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_3class_ref.mat"))
+ref.3   = data.3$models
 prob.3  = data.3$prob
-aic.3   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_3class_aic.txt")))
+# aic.3   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_3class_aic.txt")))
 data.3  = NULL
 
-data.4  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_4class_ref.mat"))
-ref.4   = data.4$references
+data.4  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_4class_ref.mat"))
+ref.4   = data.4$models
 prob.4  = data.4$prob
-aic.4   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_4class_aic.txt")))
+# aic.4   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_4class_aic.txt")))
 data.4  = NULL
 
-data.5  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_5class_ref.mat"))
-ref.5   = data.5$references
+data.5  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_5class_ref.mat"))
+ref.5   = data.5$models
 prob.5  = data.5$prob
-aic.5   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_5class_aic.txt")))
+# aic.5   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_5class_aic.txt")))
 data.5  = NULL
 
-data.6  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_6class_ref.mat"))
-ref.6   = data.6$references
+data.6  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_6class_ref.mat"))
+ref.6   = data.6$models
 prob.6  = data.6$prob
-aic.6   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_6class_aic.txt")))
+# aic.6   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_6class_aic.txt")))
 data.6  = NULL
 
-data.7  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_7class_ref.mat"))
-ref.7   = data.7$references
+data.7  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_7class_ref.mat"))
+ref.7   = data.7$models
 prob.7  = data.7$prob
-aic.7   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_7class_aic.txt")))
+# aic.7   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_7class_aic.txt")))
 data.7  = NULL
 
-data.8  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_8class_ref.mat"))
-ref.8   = data.8$references
+data.8  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_8class_ref.mat"))
+ref.8   = data.8$models
 prob.8  = data.8$prob
-aic.8   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_8class_aic.txt")))
+# aic.8   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_8class_aic.txt")))
 data.8  = NULL
 
-data.9  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_9class_ref.mat"))
-ref.9   = data.9$references
+data.9  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_9class_ref.mat"))
+ref.9   = data.9$models
 prob.9  = data.9$prob
-aic.9   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_9class_aic.txt")))
+# aic.9   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_9class_aic.txt")))
 data.9  = NULL
 
-data.10  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_10class_ref.mat"))
-ref.10   = data.10$references
+data.10  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_10class_ref.mat"))
+ref.10   = data.10$models
 prob.10  = data.10$prob
-aic.10   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_10class_aic.txt")))
+# aic.10   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_10class_aic.txt")))
 data.10  = NULL
 
-data.11  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_11class_ref.mat"))
-ref.11   = data.11$references
-prob.11  = data.11$prob
-aic.11   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_11class_aic.txt")))
-data.11  = NULL
-
-data.12  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_12class_ref.mat"))
-ref.12   = data.12$references
-prob.12  = data.12$prob
-aic.12   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_12class_aic.txt")))
-data.12  = NULL
-
-data.13  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_13class_ref.mat"))
-ref.13   = data.13$references
-prob.13  = data.13$prob
-aic.13   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_13class_aic.txt")))
-data.13  = NULL
-
-data.14  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_14class_ref.mat"))
-ref.14   = data.14$references
-prob.14  = data.14$prob
-aic.14   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_14class_aic.txt")))
-data.14  = NULL
-
-data.15  = read.references(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_15class_ref.mat"))
-ref.15   = data.15$references
-prob.15  = data.15$prob
-aic.15   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_15class_aic.txt")))
-data.15  = NULL
-
-ref     = list(ref.15, ref.14, ref.13, ref.12, ref.11, ref.10, ref.9, ref.8, ref.7, ref.6, ref.5, ref.4, ref.3, ref.2, ref.1)
-prob    = list(prob.15, prob.14, prob.13, prob.12, prob.11, prob.10, prob.9, prob.8, prob.7, prob.6, prob.5, prob.4, prob.3, prob.2,prob.1)
-aic     =    c(aic.15, aic.14, aic.13, aic.12, aic.11, aic.10, aic.9, aic.8, aic.7, aic.6, aic.5, aic.4, aic.3, aic.2, aic.1)
+ref     = list(ref.10, ref.9, ref.8, ref.7, ref.6, ref.5, ref.4, ref.3, ref.2, ref.1)
+prob    = list(prob.10, prob.9, prob.8, prob.7, prob.6, prob.5, prob.4, prob.3, prob.2,prob.1)
+# aic     =    c(aic.15, aic.14, aic.13, aic.12, aic.11, aic.10, aic.9, aic.8, aic.7, aic.6, aic.5, aic.4, aic.3, aic.2, aic.1)
+aic     = rep(0, length(ref))
 
 # number of runs
 n_run       = length(ref)
 # number of different classes overall
 n_class_tot = sum(unlist(lapply(ref, nrow)))
 # max value of K
 n_class_max = max(unlist(lapply(ref, nrow)))
 
 # some colors
 colors = rep(brewer.pal(9, "Set1")[1], n_class_max)
 
 # construct a matrix with all discovered references on the rows
 references    = matrix(nrow=n_class_tot, ncol=ncol(ref[[1]]))
 run_value     = vector(length=n_class_tot)
 k_value       = vector(length=n_class_tot)
 probabilities = vector(length=n_class_tot)
 k = 1
 for(i in 1:n_run)
 { 
   for(j in 1:nrow(ref[[i]]))
   { references[k,]   = ref[[i]][j,] 
     probabilities[k] = prob[[i]][j]
     run_value[k] = i
     k_value[k]   = j
     k = k + 1
   }
 }
 
 # distance matrix between all references
-distances           = distance.ref(references)
+distances           = distance.model(references)
 rownames(distances) = 1:nrow(distances)
 colnames(distances) = 1:ncol(distances)
 
+
 plot.references(file.path("results","bulk_sequencing", "ctcf_dnase.png"), 
                 references, probabilities, colors, aic, distances, n_run, run_value, n_class_max)
 
diff --git a/scripts/bulk_sequencing/analysis_cluster_ctcf_mnase_k562.R b/scripts/bulk_sequencing/analysis_cluster_ctcf_mnase_k562.R
index 20bc1dd..8c2c613 100755
--- a/scripts/bulk_sequencing/analysis_cluster_ctcf_mnase_k562.R
+++ b/scripts/bulk_sequencing/analysis_cluster_ctcf_mnase_k562.R
@@ -1,138 +1,108 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # data
-data.1  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_1class_ref.mat"))
-ref.1   = data.1$references
+data.1  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_1class_ref.mat"))
+ref.1   = data.1$models
 prob.1  = data.1$prob
-aic.1   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_1class_aic.txt")))
+# aic.1   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_1class_aic.txt")))
 data.1  = NULL
 
-data.2  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_2class_ref.mat"))
-ref.2   = data.2$references
+data.2  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_2class_ref.mat"))
+ref.2   = data.2$models
 prob.2  = data.2$prob
-aic.2   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_2class_aic.txt")))
+# aic.2   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_2class_aic.txt")))
 data.2  = NULL
 
-data.3  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_3class_ref.mat"))
-ref.3   = data.3$references
+data.3  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_3class_ref.mat"))
+ref.3   = data.3$models
 prob.3  = data.3$prob
-aic.3   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_3class_aic.txt")))
+# aic.3   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_3class_aic.txt")))
 data.3  = NULL
 
-data.4  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_4class_ref.mat"))
-ref.4   = data.4$references
+data.4  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_4class_ref.mat"))
+ref.4   = data.4$models
 prob.4  = data.4$prob
-aic.4   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_4class_aic.txt")))
+# aic.4   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_4class_aic.txt")))
 data.4  = NULL
 
-data.5  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_5class_ref.mat"))
-ref.5   = data.5$references
+data.5  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_5class_ref.mat"))
+ref.5   = data.5$models
 prob.5  = data.5$prob
-aic.5   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_5class_aic.txt")))
+# aic.5   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_5class_aic.txt")))
 data.5  = NULL
 
-data.6  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_6class_ref.mat"))
-ref.6   = data.6$references
+data.6  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_6class_ref.mat"))
+ref.6   = data.6$models
 prob.6  = data.6$prob
-aic.6   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_6class_aic.txt")))
+# aic.6   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_6class_aic.txt")))
 data.6  = NULL
 
-data.7  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_7class_ref.mat"))
-ref.7   = data.7$references
+data.7  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_7class_ref.mat"))
+ref.7   = data.7$models
 prob.7  = data.7$prob
-aic.7   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_7class_aic.txt")))
+# aic.7   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_7class_aic.txt")))
 data.7  = NULL
 
-data.8  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_8class_ref.mat"))
-ref.8   = data.8$references
+data.8  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_8class_ref.mat"))
+ref.8   = data.8$models
 prob.8  = data.8$prob
-aic.8   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_8class_aic.txt")))
+# aic.8   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_8class_aic.txt")))
 data.8  = NULL
 
-data.9  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_9class_ref.mat"))
-ref.9   = data.9$references
+data.9  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_9class_ref.mat"))
+ref.9   = data.9$models
 prob.9  = data.9$prob
-aic.9   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_9class_aic.txt")))
+# aic.9   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_9class_aic.txt")))
 data.9  = NULL
 
-data.10  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_10class_ref.mat"))
-ref.10   = data.10$references
+data.10  = read.read.models(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_10class_ref.mat"))
+ref.10   = data.10$models
 prob.10  = data.10$prob
-aic.10   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_10class_aic.txt")))
+# aic.10   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_10class_aic.txt")))
 data.10  = NULL
 
-data.11  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_11class_ref.mat"))
-ref.11   = data.11$references
-prob.11  = data.11$prob
-aic.11   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_11class_aic.txt")))
-data.11  = NULL
-
-data.12  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_12class_ref.mat"))
-ref.12   = data.12$references
-prob.12  = data.12$prob
-aic.12   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_12class_aic.txt")))
-data.12  = NULL
-
-data.13  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_13class_ref.mat"))
-ref.13   = data.13$references
-prob.13  = data.13$prob
-aic.13   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_13class_aic.txt")))
-data.13  = NULL
-
-data.14  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_14class_ref.mat"))
-ref.14   = data.14$references
-prob.14  = data.14$prob
-aic.14   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_14class_aic.txt")))
-data.14  = NULL
-
-data.15  = read.references(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_15class_ref.mat"))
-ref.15   = data.15$references
-prob.15  = data.15$prob
-aic.15   = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_mnase_k562_15class_aic.txt")))
-data.15  = NULL
-
-ref     = list(ref.15, ref.14, ref.13, ref.12, ref.11, ref.10, ref.9, ref.8, ref.7, ref.6, ref.5, ref.4, ref.3, ref.2, ref.1)
-prob    = list(prob.15, prob.14, prob.13, prob.12, prob.11, prob.10, prob.9, prob.8, prob.7, prob.6, prob.5, prob.4, prob.3, prob.2,prob.1)
-aic     =    c(aic.15, aic.14, aic.13, aic.12, aic.11, aic.10, aic.9, aic.8, aic.7, aic.6, aic.5, aic.4, aic.3, aic.2, aic.1)
+ref     = list(ref.10, ref.9, ref.8, ref.7, ref.6, ref.5, ref.4, ref.3, ref.2, ref.1)
+prob    = list(prob.10, prob.9, prob.8, prob.7, prob.6, prob.5, prob.4, prob.3, prob.2, prob.1)
+# aic     =    c(aic.15, aic.14, aic.13, aic.12, aic.11, aic.10, aic.9, aic.8, aic.7, aic.6, aic.5, aic.4, aic.3, aic.2, aic.1)
+aic     = rep(0, length(ref))
 
 # number of runs
 n_run       = length(ref)
 # number of different classes overall
 n_class_tot = sum(unlist(lapply(ref, nrow)))
 # max value of K
 n_class_max = max(unlist(lapply(ref, nrow)))
 
 # some colors
 colors = rep(brewer.pal(9, "Set1")[2], n_class_max)
 
 # construct a matrix with all discovered references on the rows
 references    = matrix(nrow=n_class_tot, ncol=ncol(ref[[1]]))
 run_value     = vector(length=n_class_tot)
 k_value       = vector(length=n_class_tot)
 probabilities = vector(length=n_class_tot)
 k = 1
 for(i in 1:n_run)
 { 
   for(j in 1:nrow(ref[[i]]))
   { references[k,]   = ref[[i]][j,] 
     probabilities[k] = prob[[i]][j]
     run_value[k] = i
     k_value[k]   = j
     k = k + 1
   }
 }
 
 # distance matrix between all references
-distances           = distance.ref(references)
+distances           = distance.model(references)
 rownames(distances) = 1:nrow(distances)
 colnames(distances) = 1:ncol(distances)
 
-
 plot.references(file.path("results","bulk_sequencing", "ctcf_mnase.png"), 
                 references, probabilities, colors, aic, distances, n_run, run_value, n_class_max)
diff --git a/scripts/bulk_sequencing/cluster_ctcf_dnase_k562.sh b/scripts/bulk_sequencing/cluster_ctcf_dnase_k562.sh
index 4414100..8d5d94b 100755
--- a/scripts/bulk_sequencing/cluster_ctcf_dnase_k562.sh
+++ b/scripts/bulk_sequencing/cluster_ctcf_dnase_k562.sh
@@ -1,23 +1,22 @@
 
 results_dir='results/bulk_sequencing'
 data_dir='data/bulk_sequencing/'
 
 mkdir -p $results_dir
 
-file_mnase=$data_dir'/ctcf_dnase_k562.mat'
+file_dnase=$data_dir'/ctcf_dnase_k562.mat'
 file_seed=$results_dir'/ctcf_dnase_k562_seed.txt'
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=5
+n_core=6
 
-for k in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_dnase_k562_'$k'class_prob.mat4d'
 	file_ref=$results_dir/'ctcf_dnase_k562_'$k'class_ref.mat'
 	file_aic=$results_dir/'ctcf_dnase_k562_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --data $file_mnase --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --parallel $n_core > $file_prob
-	bin/probToRef        --data $file_mnase --prob $file_prob --parallel $n_core 1> $file_ref 2> $file_aic
+	bin/EMRead      --read $file_dnase --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_dnase --prob $file_prob --thread $n_core 1> $file_ref 2> $file_aic
 done
diff --git a/scripts/bulk_sequencing/cluster_ctcf_mnase_k562.sh b/scripts/bulk_sequencing/cluster_ctcf_mnase_k562.sh
index 29779c0..1c355a7 100755
--- a/scripts/bulk_sequencing/cluster_ctcf_mnase_k562.sh
+++ b/scripts/bulk_sequencing/cluster_ctcf_mnase_k562.sh
@@ -1,23 +1,22 @@
 
 results_dir='results/bulk_sequencing'
 data_dir='data/bulk_sequencing/'
 
 mkdir -p $results_dir
 
 file_mnase=$data_dir'/ctcf_mnase_k562.mat'
 file_seed=$results_dir'/ctcf_mnase_k562_seed.txt'
 n_iter='20'
 n_shift='21'
-seeding='random'
-n_core=5
+n_core=6
 
-for k in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+for k in 1 2 3 4 5 6 7 8 9 10
 do
 	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
 	file_prob=$results_dir/'ctcf_mnase_k562_'$k'class_prob.mat4d'
 	file_ref=$results_dir/'ctcf_mnase_k562_'$k'class_ref.mat'
 	file_aic=$results_dir/'ctcf_mnase_k562_'$k'class_aic.txt'
 	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --data $file_mnase --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --parallel $n_core > $file_prob
-	bin/probToRef        --data $file_mnase --prob $file_prob --parallel $n_core 1> $file_ref 2> $file_aic
+	bin/EMRead      --read $file_mnase --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mnase --prob $file_prob --thread $n_core 1> $file_ref 2> $file_aic
 done
diff --git a/scripts/functions.R b/scripts/functions.R
index acfa474..0345e93 100644
--- a/scripts/functions.R
+++ b/scripts/functions.R
@@ -1,696 +1,422 @@
 #' Reads a read density model file and returns a list 
 #' with the class models and the associated 
 #' class probabilities.
 #' \param file the path to the file of interest.
 #' \return a list of two elements : "models" 
 #' a matrix with the class models on each row 
 #' and "prob" the associated class probabilities.
 #'
 read.read.models = function(file)
 { mod  = as.matrix(read.table(file), drop=F)
   prob = mod[,1]
   mod  = mod[,-1, drop=F]
   rownames(mod) = paste("class", 1:nrow(mod))
   colnames(mod) = 1:ncol(mod)
   return(list(models=mod, prob=prob))
 }
 
 #' Reads a sequence model file and returns a list 
 #' with the class models and the associated 
 #' class probabilities.
 #' \param file the path to the file of interest.
 #' \return a list of two elements : "models" 
 #' an array containing the models as probability
 #' matrices with the following dimensions :
 #' 1) 4 for A,C,G,T
 #' 2) the model length
 #' 3) the numbler of classes
 #' and "prob" the associated class probabilities.
 #'
 read.sequence.models = function(file)
 { data  = as.matrix(read.table(file.path(file)))
 
-  prob  = unique(data[,1])
+  # prob  = unique(data[,1])
+  prob = data[,1][rep(c(T,F,F,F), rep=nrow(data)/4)]
   
   n_class = length(prob)
   l_model = ncol(data) - 1
   n_row   = 4
   
   models = array(dim=c(n_row, l_model, n_class))
   dimnames(models)[[1]] = c('A', 'C', 'G', 'T')
   dimnames(models)[[2]] = 1:l_model
   dimnames(models)[[3]] = paste("class" , 1:n_class)
   
   i_from = 1
   i_to = i_from + n_row - 1
   for(k in 1:n_class)
   { models[,,k] = data[i_from:i_to,-1]
     i_from = i_to + 1
     i_to = i_from + n_row - 1
   }
   return(list(models=models, prob=prob))
 }
 
 #' Computes the reverse complement of a
 #' DNA motif.
 #' \param the motif of interest with 
 #' A,C,G,T on the rows and the positions 
 #' on the columns.
 #' \return the reverse complement motif.
 #' \author Romain Groux
 reverse.complement = function(motif)
 { n.row = nrow(motif)
   n.col = ncol(motif)
   motif.rev = matrix(nrow=n.row, ncol=n.col)
   
   for(i in 1:n.row)
   { for(j in 1:n.col)
     { i_rev = n.row - i + 1
       j_rev = n.col - j + 1
       motif.rev[i_rev,j_rev] = motif[i,j]
     }
   }
   return(motif.rev)
 }
 
 #' Computes the Kullback-Leibler
 #' divergence of a given distristribution
 #' x to its corresponding uniform 
 #' counterpart.
 #' For instance c(0.7, 0.1, 0.1, 0.1) 
 #' will be compared to 
 #' c(0.25, 0.25, 0.25, 0.25)
 #' \param x a vector containing the 
 #' probability mass function values of
 #' the distribution for all possible
 #' values.
 #' \return the Kullback-Leibler
 #' divergence 
 kl.divergence = function(x)
 {
   kl = 0
   p0 = 1 / length(x)
   for(i in x)
   { kl = kl + (i * log(i/p0)) }
   return(kl)
 }
 
 #' A function to plot a DNA logo of a letter probability 
 #' matrix (pwm). In essence, it does exactly the same 
 #' as seqLogo::seqLogo except that it does not need 
 #' a new display device on its own.
 #' \param pwm the letter probability matrix.
 #' \param path.a the path to a file containing
 #' the image to display for the A character, 
 #' in PNG format.
 #' \param path.c the path to a file containing
 #' the image to display for the C character, 
 #' in PNG format.
 #' \param path.g the path to a file containing
 #' the image to display for the G character, 
 #' in PNG format.
 #' \param path.t the path to a file containing
 #' the image to display for the T character, 
 #' in PNG format.
 #' \param pseudocounts a pseudocounts to add to
 #' the probabilities to avoid 0's.
 #' \param ... additional plotting parameters for
 #' plot().
 #' \author Romain Groux
 plot.logo = function(pwm,
                      path.a,
                      path.c,
                      path.g,
                      path.t,
                      pseudocounts=10e-10,
                      ...)
 { n.row = 4
   n.col = ncol(pwm)
   
   if(nrow(pwm) != n.row)
   { stop("Error! pwm should have 4 rows!") }
   if(length(dim(pwm)) != 2)
   { stop("Error! pwm should be a matrix!") }
   
   # images for nucleotides
   require(png)
   image.a = readPNG(path.a)
   image.c = readPNG(path.c)
   image.g = readPNG(path.g)
   image.t = readPNG(path.t)
   
   # add pseudo-counts to avoid 0's
   pwm = pwm + pseudocounts
   for(j in 1:n.col)
   { pwm[,j] = pwm[,j] / sum(pwm[,j]) }
   
   # entropy
   h = rep(0, n.col)
   for(j in 1:n.col)
   { for(i in 1:n.row)
     { h[j] = h[j] - pwm[i,j] * log2(pwm[i,j]) }
   }
   # information content
   r = -h + log2(4)
   # height
   heights = matrix(nrow=n.row, ncol=n.col, data=0)
   for(i in 1:n.row)
   { for(j in 1:n.col)
     { heights[i,j] = pwm[i,j] * r[j]  }
   }
   
   # compute coordinates
   x.coord = matrix(nrow=2, ncol=n.col, data=0)
   rownames(x.coord) = c("from", "to")
   for(i in 1:n.col)
   { x.coord[1,i] = i - 0.5
     x.coord[2,i] = i + 0.5
   }
   
   # plot
   x.lim = c(1,n.col)
   y.lim = c(0,2)
   x.at  = 1:n.col
   plot(0, 0, col=0, xlim=x.lim, ylim=y.lim, bty='n',
        xaxt='n', yaxt='n', xlab="", ylab="",
        ...)
   # axis(1, at=x.at, labels=x.at)
   for(j in 1:n.col)
   { # highest at top
     ord = order(heights[,j], decreasing=F)
     x_left  = x.coord[1,j]
     x_right = x.coord[2,j]
     y_curr  = 0
     for(i in ord)
     { height   = heights[i,j]
       y_bottom = y_curr
       y_top    = y_bottom + height
       if(i == 1)
       { rasterImage(image.a, x_left, y_bottom, x_right, y_top) }
       if(i == 2)
       { rasterImage(image.c, x_left, y_bottom, x_right, y_top) }
       if(i == 3)
       { rasterImage(image.g, x_left, y_bottom, x_right, y_top) }
       if(i == 4)
       { rasterImage(image.t, x_left, y_bottom, x_right, y_top) }
       y_curr = y_curr + height
     }
   }
 }
 
 
 #' Compute the euclidean distance between two models.
 #' It also check if a reference is in reverse orientation 
 #' and returns the smallest distance value.
 #' \param ref1 a vector containing the first reference.
 #' \param ref2 a vector containing the second reference.
 #' \return the euclidean distance.
 eucl.dist.models = function(mod1, mod2)
 {
   return(min(sqrt(sum(((mod1 -     mod2 ) ^ 2))),
              sqrt(sum(((mod1 - rev(mod2)) ^ 2)))))
 }
 
 
 #' Compute the correlation distance between two models.
 #' It also check if a reference is in reverse orientation 
 #' and returns the smallest distance value.
 #' \param ref1 a vector containing the first reference.
 #' \param ref2 a vector containing the second reference.
 #' \return the euclidean distance.
 cor.dist.models= function(mod1, mod2)
 {
   return(1 - min(cor(mod1,     mod2 ),
                  cor(mod1, rev(mod2))))
 }
 
 
 #' Computes the (eucliden) distance matrix  for all the given 
 #' the models As some models may be in reverse 
 #' orientation compared to others, the distance in both 
 #' orientation is computed, for each pair, and the best is 
 #' returned.
 #' \param models a matrix with the models on each row.
 #' \return a matrix containing the distances between each reference.
 distance.model = function(models)
 { n = nrow(models)
   d = matrix(nrow=n, ncol=n, data=0)
   
   for(i in 1:n)
   { for(j in 1:i)
     { x =  eucl.dist.models(models[i,], models[j,])
       d[i,j] = x
       d[j,i] = x
     }
   }
   return(d)
 }
 
 
 get_matches = function(distances, run_value)
 {
   matches = matrix(nrow=0, ncol=4)
   
   # references of run i on the row -> y coord
   # references of run j on the col -> x coord
   
   # run labels
   run_i = 1
   # run_j = 2
   
   for(run_j in setdiff(unique(run_value), run_i))
   {
     # number of references in each run
     n_i = length(which(run_value == run_i))
     n_j = length(which(run_value == run_j))
     
     index_i = which(run_value == run_i) # rows    of run i
     index_j = which(run_value == run_j) # columns of run j
     
     i_taken = c() # classes of i already matched -> rows    to ignore
     j_taken = c() # classes of j already matched -> columns to ignore
     
     # while not all classes in j have been assigned a best match
     row_n = 1
     while(length(j_taken) < n_j)
     { if(length(i_taken) == 0 &&
          length(j_taken) == 0)
       { distances_tmp = distances[index_i, index_j, drop=F]
         coord   = which(distances_tmp == min(distances_tmp), arr.ind=T)
         coord_i = as.numeric(rownames(distances_tmp)[coord[1]])
         coord_j = as.numeric(colnames(distances_tmp)[coord[2]])
         coord   = c(coord_i, coord_j)
       } else { 
         rows = setdiff(index_i, i_taken)
         cols = setdiff(index_j, j_taken)
         distances_tmp = distances[rows, cols, drop=F]
         coord = which(distances_tmp == min(distances_tmp), arr.ind=T)
         coord_i = as.numeric(rownames(distances_tmp)[coord[1]])
         coord_j = as.numeric(colnames(distances_tmp)[coord[2]])
         coord   = c(coord_i, coord_j)
       }
       coord   = c(coord, row_n, run_j)
       i_taken = c(i_taken, coord[1])
       j_taken = c(j_taken, coord[2])
       matches = rbind(matches, coord)
       row_n = row_n + 1
     }
   }
   return(matches)
 }
 
 
 
 #'Creates a composite figure in which several class references from 
 #'several partitions, with different numbers of classes, are plotted.
 #'The figure is composed of a matrix of <k_max> rows and <n_run> 
 #'columns where <k_max> is the highest number of classes in all 
 #'partitions and <n_run> the number of different partition. T
 #'The first column will contain the references of the 
 #'partition with <k_max> classes. The next columns will contain the 
 #'references of the partition with the second biggest number of 
 #'classes (and so on). In a given column, except the 1st one, 
 #'the references are ordered (over the rows) such that the 
 #'overall similarity (euclidean distance) with the 1st column 
 #'references are maximized.
 #'\param file the file name where the image will be saved.
 #'\param references a matrix with the different references to draw on 
 #'each row.
 #'\param references a vector containing the class probability (or weight) associated
 #'to each corresponding reference (row) in matrix.
 #'\param probabilities a vector of <n_run> values that will be displayed atop of each 
 #'column of plots.
 #'\param colors a vector of colors to draw the class profiles. There should 
 #'be <k_max> colors, they can be the same.
 #'\param distances a distance matrix containing the distance between all 
 #'references. The row and column labels have to be the row and column 
 #'number (1, 2, 3, ...)!
 #'\param n_run the total number of different partitions to which all 
 #'references belong.
 #'\param run_value a vector indicating to which partition each reference 
 #'(row of references) belong to. It should be a simple vector of integers,
 #'for instance 1,1,1,1,2,2,2,3,3
 #'\param n_class_max, the highest number of classes searches in all partitions (<k_max>)
 plot.references = function(file,
                            references,
                            probabilities,
                            colors,
                            col.titles,
                            distances, 
                            n_run,
                            run_value,
                            n_class_max,
                            width=15,
                            height=18)
 { 
   # compute the best matches between all references to 1st run references
   matches       = get_matches(distances, run_value)
   
   # make a matrix for layout with good plot numbers
   plots.lab       = matrix(nrow=n_class_max+1, ncol=n_run) # the 1st row will be filled last with only text (col.titles)
   plots.lab[1,]   = (length(plots.lab) - ncol(plots.lab) + 1) : length(plots.lab) 
   plots.lab[-1,1] = 1:n_class_max # for run with max number of classes
   z = n_class_max + 1
   for(i in 1:nrow(matches))
   { coord = matches[i,]
     # plots.lab[coord[3], coord[4]] = z
     plots.lab[coord[1]+1, coord[4]] = z
     z = z + 1
   }
   # these will be the empty plots
   for(i in 1:nrow(plots.lab))
   { for(j in 1:ncol(plots.lab))
     { if(is.na(plots.lab[i,j]))
       { plots.lab[i,j] = z
         z = z + 1
       }
     }
   }
   
 
   # plot
-  png(filename=file, width=width, height=height, unit="in", res=720)
+  if(!is.null(file))
+  { png(filename=file, width=width, height=height, unit="in", res=720) }
+  else
+  { X11(width=width, height=height) }
     # a grid
     m = layout(mat = plots.lab, heights=c(0.3, rep(1, nrow(plots.lab)-1)) )
     layout.show(m)
     x = 1:ncol(references)
     
     # plot references of partition with highest number of classes
     for(i in 1:n_class_max)
     { plot(x=x, y=references[i,], lwd=2, type='l', ylim=c(0, 1.2*max(references[i,])),
            col=colors[i], main="", xlab="pos [bp]", ylab="Nb reads")
       # prob
       x_ = 0.85*length(references[i,])
       y_ = max(references[i,])
       lab = round(probabilities[i],3)
       text(x=x_, y=y_, labels=lab, cex=1.2)
     }
     
     # plot others
     for(i in 1:nrow(matches))
     { 
       ref_index = matches[i,2]
       col_index = matches[i,3]
       
      
       plot(x=x, y=references[ref_index,], lwd=2, type='l', ylim=c(0, 1.2*max(references[ref_index,])),
            col=colors[col_index], main="", xlab="pos [bp]", ylab="Nb reads")
       # prob
       x_ = 0.85*length(references[ref_index,])
       y_ = max(references[ref_index,])
       lab = round(probabilities[ref_index],3)
       text(x=x_, y=y_, labels=lab, cex=1.2)
     }
     
     # empty plots
     for(i in (length(run_value)+1):(n_run*n_class_max))
     { plot(1,1,xlab="", ylab="", main="", col=0, xaxt="n", yaxt="n", bty="n") }
     
     # col titles
     p = par(mar=c(0,0,0,0))
     for(i in 1:length(col.titles))
     { plot(1,1,xlab="", ylab="", main="", col=0, xaxt="n", yaxt="n", bty="n")
       text(1,1, labels=col.titles[i], cex=2)
     }
     par(p)
-  dev.off()
-}
-
-
-
-
-plot.references.2 = function(file,
-                             references,
-                             probabilities,
-                             colors,
-                             col.titles,
-                             distances, 
-                             n_run,
-                             run_value,
-                             n_class_max,
-                             width=15,
-                             height=18)
-{ 
-  # compute the best matches between all references to 1st run references
-  matches       = get_matches(distances, run_value)
-  
-  # make a matrix for layout with good plot numbers
-  plots.lab       = matrix(nrow=n_class_max+1, ncol=n_run) # the 1st row will be filled last with only text (col.titles)
-  plots.lab[1,]   = (length(plots.lab) - ncol(plots.lab) + 1) : length(plots.lab) 
-  plots.lab[-1,1] = 1:n_class_max # for run with max number of classes
-  z = n_class_max + 1
-  for(i in 1:nrow(matches))
-  { coord = matches[i,]
-  # plots.lab[coord[3], coord[4]] = z
-  plots.lab[coord[1]+1, coord[4]] = z
-  z = z + 1
-  }
-  # these will be the empty plots
-  for(i in 1:nrow(plots.lab))
-  { for(j in 1:ncol(plots.lab))
-    { if(is.na(plots.lab[i,j]))
-      { plots.lab[i,j] = z
-        z = z + 1
-      }
-    }
-  }
-  
-  # plot
-  if(is.null(file))
-  { X11(width=width, height=height) }
-  else
-  { png(filename=file, width=width, height=height, unit="in", res=720) }
-    # a grid
-    m = layout(mat = plots.lab, heights=c(0.3, rep(1, nrow(plots.lab)-1)) )
-    # layout.show(m)
-    x = 1:ncol(references[[1]])
-    
-    # plot references of partition with highest number of classes
-    for(i in 1:n_class_max)
-    { for(j in 1:length(references))
-      { 
-        ylim = c(0, 1.2)
-        if(j == 1)
-        { plot(x=x, y=references[[j]][i,]/max(references[[j]][i,]),
-               lwd=2, type='l', ylim=ylim,
-               col=colors[j], main="", xlab="pos [bp]", ylab="Nb reads")
-        }
-        else
-        { lines(x=x, y=references[[j]][i,]/max(references[[j]][i,]),
-                lwd=2, type='l', col=colors[j])
-        }
-      }
-      
-      # prob
-      x_ = 0.85*length(references[[1]][i,])
-      # y_ = max(references[[1]][i,])
-      y_ = 0.85
-      lab = round(probabilities[i],3)
-      text(x=x_, y=y_, labels=lab, cex=1.2)
-    }
-    
-    # plot others
-    for(i in 1:nrow(matches))
-    { ref_index = matches[i,2]
-      col_index = matches[i,3]
-      for(j in 1:length(references))
-      { ylim = c(0, 1.2)
-        if(j == 1)
-        { plot(x=x, y=references[[j]][ref_index,]/max(references[[j]][ref_index,]),
-               lwd=2, type='l', ylim=ylim,
-               col=colors[j], main="", xlab="pos [bp]", ylab="Nb reads")
-        }
-        else
-        { lines(x=x, y=references[[j]][ref_index,]/max(references[[j]][ref_index,]),
-                lwd=2, col=colors[j])
-        }
-      }
-      # prob
-      x_ = 0.85*length(references[[1]][ref_index,])
-      # y_ = max(references[[1]][ref_index,])
-      y_ = 0.85
-      lab = round(probabilities[ref_index],3)
-      text(x=x_, y=y_, labels=lab, cex=1.2)
-    }
-    
-    # empty plots
-    for(i in (length(run_value)+1):(n_run*n_class_max))
-    { plot(1,1,xlab="", ylab="", main="", col=0, xaxt="n", yaxt="n", bty="n") }
-    
-    # col titles
-    p = par(mar=c(0,0,0,0))
-    for(i in 1:length(col.titles))
-    { plot(1,1,xlab="", ylab="", main="", col=0, xaxt="n", yaxt="n", bty="n")
-      text(1,1, labels=col.titles[i], cex=2)
-    }
-    par(p)
-  if(!is.null(file))
-  { dev.off() }
-}
-
-
-plot.references.3 = function(file,
-                             references,
-                             probabilities,
-                             colors,
-                             col.titles,
-                             distances, 
-                             n_run,
-                             run_value,
-                             n_class_max,
-                             width=15,
-                             height=18)
-{ 
-  # compute the best matches between all references to 1st run references
-  matches       = get_matches(distances, run_value)
-  
-  # make a matrix for layout with good plot numbers
-  plots.lab       = matrix(nrow=n_class_max+1, ncol=n_run) # the 1st row will be filled last with only text (col.titles)
-  plots.lab[1,]   = (length(plots.lab) - ncol(plots.lab) + 1) : length(plots.lab) 
-  plots.lab[-1,1] = 1:n_class_max # for run with max number of classes
-  z = n_class_max + 1
-  for(i in 1:nrow(matches))
-  { coord = matches[i,]
-    # plots.lab[coord[3], coord[4]] = z
-    plots.lab[coord[1]+1, coord[4]] = z
-    z = z + 1
-  }
-  # these will be the empty plots
-  for(i in 1:nrow(plots.lab))
-  { for(j in 1:ncol(plots.lab))
-    { if(is.na(plots.lab[i,j]))
-      { plots.lab[i,j] = z
-        z = z + 1
-      }
-    }
-  }
-  
-  # plot
-  if(is.null(file))
-  { X11(width=width, height=height) }
-  else
-  { png(filename=file, width=width, height=height, unit="in", res=720) }
-    
-    p = par(mar=c(0,0,0,0))  
-  
-    # a grid
-    m = layout(mat = plots.lab, heights=c(0.3, rep(1, nrow(plots.lab)-1)) )
-    # layout.show(m)
-    x = 1:ncol(references[[1]])
-    
-    # plot references of partition with highest number of classes
-    for(i in 1:n_class_max)
-    { for(j in 1:length(references))
-    { 
-      ylim = c(0, 1.2)
-      if(j == 1)
-      { plot(x=x, y=references[[j]][i,]/max(references[[j]][i,]),
-             lwd=2, type='l', ylim=ylim,
-             col=colors[j], main='', xlab='', ylab='',
-             xaxt='n', yaxt='n')
-      }
-      else
-      { lines(x=x, y=references[[j]][i,]/max(references[[j]][i,]),
-              lwd=2, type='l', col=colors[j])
-      }
-    }
-      
-      # prob
-      x_ = 0.85*length(references[[1]][i,])
-      # y_ = max(references[[1]][i,])
-      y_ = 0.85
-      lab = round(probabilities[i],3)
-      text(x=x_, y=y_, labels=lab, cex=1.2)
-    }
-    
-    # plot others
-    for(i in 1:nrow(matches))
-    { ref_index = matches[i,2]
-    col_index = matches[i,3]
-    for(j in 1:length(references))
-    { ylim = c(0, 1.2)
-      if(j == 1)
-      { plot(x=x, y=references[[j]][ref_index,]/max(references[[j]][ref_index,]),
-             lwd=2, type='l', ylim=ylim,
-             col=colors[j], main='', xlab='', ylab='',
-             xaxt='n', yaxt='n')
-      }
-      else
-      { lines(x=x, y=references[[j]][ref_index,]/max(references[[j]][ref_index,]),
-              lwd=2, col=colors[j])
-      }
-    }
-    # prob
-    x_ = 0.85*length(references[[1]][ref_index,])
-    # y_ = max(references[[1]][ref_index,])
-    y_ = 0.85
-    lab = round(probabilities[ref_index],3)
-    text(x=x_, y=y_, labels=lab, cex=1.2)
-    }
-    
-    # empty plots
-    for(i in (length(run_value)+1):(n_run*n_class_max))
-    { plot(1,1,xlab="", ylab="", main="", col=0, xaxt="n", yaxt="n", bty="n") }
-    
-    # col titles
-    for(i in 1:length(col.titles))
-    { plot(1,1, xlab="", ylab="", main="", col=0, xaxt="n", yaxt="n", bty="n")
-      text(1,1, labels=col.titles[i], cex=2)
-    }
-    par(p)
   if(!is.null(file))
   { dev.off() }
 }
-
-
-plot.references.4 = function(file,
-                             references,
-                             probabilities,
-                             colors,
-                             width=15,
-                             height=18)
-{ 
-  n_class = nrow(references[[1]])
-  n_col   = ncol(references[[1]])
-  mat     = matrix(nrow=n_class, ncol=1, data=1:n_class)
-  
-  # plot
-  if(is.null(file))
-  { X11(width=width, height=height) }
-  else
-  { png(filename=file, width=width, height=height, unit="in", res=720) }
-  
-    p = par(mar=c(0,0,0,0))  
-  
-    # a grid
-    m = layout(mat = mat)
-    # layout.show(m)
-    x = 1:n_col
-    
-    for(i in 1:n_class)
-    { for(j in 1:length(references))
-      {
-        ylim = c(0, 1.2)
-        if(j == 1)
-        { plot(x=x, y=references[[j]][i,]/max(references[[j]][i,]),
-               lwd=2, type='l', ylim=ylim,
-               col=colors[j], main='', xlab='', ylab='',
-               xaxt='n', yaxt='n')
-        }
-        else
-        { lines(x=x, y=references[[j]][i,]/max(references[[j]][i,]),
-                lwd=2, type='l', col=colors[j])
-        }
-      }
-      # prob
-      x_ = 0.85*length(references[[1]][i,])
-      # y_ = max(references[[1]][i,])
-      y_ = 0.85
-      lab = round(probabilities[i],3)
-      text(x=x_, y=y_, labels=lab, cex=1.2)
-    }
-    
-  if(!is.null(file))
-  { dev.off() }
-}
-
diff --git a/scripts/genomes/hg19.sh b/scripts/genomes/hg19.sh
new file mode 100644
index 0000000..3510350
--- /dev/null
+++ b/scripts/genomes/hg19.sh
@@ -0,0 +1,64 @@
+data_dir=data/genomes
+
+mkdir $data_dir
+
+
+# hg19 genome from Ensembl
+## NOTE the hg19 genome was downloaded on the 13 of August 2019.
+
+## download all chromosomes
+file_fa=$data_dir/'hg19.fasta'
+touch $file_fa
+for chr in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y
+do
+	file_gz=$data_dir/'hg19_chr_'$chr'.fasta.gz'
+	wget -O $file_gz ftp://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.chromosome.$chr.fa.gz
+	gzip -d -c $file_gz >> $file_fa
+	rm $file_gz
+done
+
+## format sequence header to fit 'chrN' format
+file_tmp=$data_dir/tmp.fasta
+sed -E 's/ dna.+//' $file_fa | sed 's/>/>chr/' > $file_tmp
+mv $file_tmp $file_fa
+
+
+
+# repeat masked hg19 genome (repeated elements are 'N') from Ensembl
+## NOTE the hg19 repeated masked genome was downloaded on the 12 of August 2019.
+
+## download all chromosomes
+file_fa_rm=$data_dir/'hg19_rmsk.fasta'
+touch $file_fa_rm
+for chr in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y
+do
+	file_gz=$data_dir/'hg19_chr_'$chr'_rmsk.fasta.gz'
+	wget -O $file_gz ftp://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna_rm.chromosome.$chr.fa.gz
+	gzip -d -c $file_gz >> $file_fa_rm
+	rm $file_gz
+done
+
+## format sequence header to fit 'chrN' format
+file_tmp=$data_dir/tmp.fasta
+sed -E 's/ dna.+//' $file_fa_rm | sed 's/>/>chr/' > $file_tmp
+mv $file_tmp $file_fa_rm
+
+
+
+# repeated elements annotation from USCS
+##  NOTE
+## data/genomes/hg19_rmsk_original.bed was downloaded from http://genome.ucsc.edu/cgi-bin/hgTables
+## on the 9th of August 2019 with the following options :
+## clade: mammal genome: Human        assembly: hg19
+## group: repeat track:  repeatMasker
+## output file: hg19_rmsk_original.bed
+## file type returned: gzip compressed
+## "get output" button
+## "get BED" button
+
+## sort by chromosome and position
+sort -k 1,1V -k2,2n -k3,3n $data_dir/hg19_rmsk_original.bed > $data_dir/hg19_rmsk_original_sorted.bed
+mv $data_dir/hg19_rmsk_original_sorted.bed $data_dir/hg19_rmsk_original.bed
+
+## only keep chr1/2.../M/X/Y
+grep -E '^chr[0-9XYM]+\s' $data_dir/hg19_rmsk_original.bed > $data_dir/hg19_rmsk.bed
diff --git a/scripts/install_libraries/install_libUnitTest++.sh b/scripts/install_libraries/install_libUnitTest++.sh
new file mode 100644
index 0000000..dd39af5
--- /dev/null
+++ b/scripts/install_libraries/install_libUnitTest++.sh
@@ -0,0 +1,23 @@
+# install the boost library
+library_dir='lib/UnitTest++'
+lib_dir="$library_dir/lib"
+include_dir="$library_dir/include"
+
+# download src
+git clone https://github.com/unittest-cpp/unittest-cpp.git
+
+mkdir -p $library_dir
+mkdir -p $lib_dir
+mkdir -p $include_dir
+
+cd unittest-cpp/
+
+# install
+cmake3 . && make
+find UnitTest++/ -name "*.cpp" -type f -delete
+mv ./libUnitTest++.a ../$lib_dir/
+mv UnitTest++/*      ../$include_dir/
+
+# clean
+cd ..
+rm -rf unittest-cpp
diff --git a/scripts/install_libraries/install_libboost.sh b/scripts/install_libraries/install_libboost.sh
new file mode 100644
index 0000000..e8acfca
--- /dev/null
+++ b/scripts/install_libraries/install_libboost.sh
@@ -0,0 +1,17 @@
+# install the boost library
+
+# download src
+wget https://dl.bintray.com/boostorg/release/1.70.0/source/boost_1_70_0.tar.gz
+tar -xzvf boost_1_70_0.tar.gz
+
+cd boost_1_70_0/
+
+# build and install
+mkdir -p $library_dir
+./bootstrap.sh --prefix=$(pwd)/lib/boost
+./b2 install link=static # program_options
+
+# clean
+cd ..
+rm -r boost_1_70_0
+rm    boost_1_70_0.tar.gz
diff --git a/scripts/install_libraries/run_all.sh b/scripts/install_libraries/run_all.sh
index 6251f3d..46813b5 100644
--- a/scripts/install_libraries/run_all.sh
+++ b/scripts/install_libraries/run_all.sh
@@ -1,5 +1,17 @@
+# install libraries
+
+## C++ libraries
 mkdir lib/
-mkdir lib/include
+scripts/install_libraries/install_libboost.sh
+scripts/install_libraries/install_libSeqAn.sh
+scripts/install_libraries/install_libUnitTest++.sh
+
+## python libraries
+### make sure that pip is installed for python3.6
+# curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
+# sudo python3.6 get-pip.py --force-reinstall
+
+sudo pip3.6 install intervaltree
+sudo pip3.6 install pysam
 
-scripts/install_libraries/install_libStatGen.sh
 
diff --git a/scripts/install_programs/install_bedtools.sh b/scripts/install_programs/install_bedtools.sh
new file mode 100644
index 0000000..b06ab32
--- /dev/null
+++ b/scripts/install_programs/install_bedtools.sh
@@ -0,0 +1,15 @@
+# download
+wget https://github.com/arq5x/bedtools2/releases/download/v2.28.0/bedtools-2.28.0.tar.gz
+tar -zxvf bedtools-2.28.0.tar.gz
+cd bedtools2
+
+# compile
+make
+
+# install
+mkdir ../bin/bedtools
+mv bin/* ../bin/bedtools
+
+# clean
+cd ..
+rm -r bedtools2
diff --git a/scripts/install_programs/install_deeptools.sh b/scripts/install_programs/install_deeptools.sh
deleted file mode 100644
index 9df2c69..0000000
--- a/scripts/install_programs/install_deeptools.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-
-# make sure that pip is installed for python3.6
-# curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
-# sudo python3.6 get-pip.py --force-reinstall
-
-# install deeptools for python3.6
-sudo pip3.6 install deeptools
-
diff --git a/scripts/install_programs/run_all.sh b/scripts/install_programs/run_all.sh
index fd23b47..686706e 100644
--- a/scripts/install_programs/run_all.sh
+++ b/scripts/install_programs/run_all.sh
@@ -1 +1,4 @@
-scripts/install_programs/install_deeptools.sh
+# install programs
+
+scripts/install_programs/install_bedtools.sh
+
diff --git a/scripts/pwm/reformat_jaspar_2018.sh b/scripts/pwm/reformat_jaspar_2018.sh
new file mode 100755
index 0000000..5101c91
--- /dev/null
+++ b/scripts/pwm/reformat_jaspar_2018.sh
@@ -0,0 +1,7 @@
+# NOTE
+# 
+pwmtool_dir='scripts/pwm_tools'
+data_dir="data/pwm/jaspar_2018"
+
+wget -O $data_dir/'JASPAR2018_CORE_vertebrates_non-redundant_pfms_jaspar.zip' http://jaspar.genereg.net/download/CORE/JASPAR2018_CORE_vertebrates_non-redundant_pfms_jaspar.txt
+unzip -d $data_dir $data_dir/'JASPAR2018_CORE_vertebrates_non-redundant_pfms_jaspar.zip'
diff --git a/scripts/pwm/reformat_jaspar_2018_clustering.sh b/scripts/pwm/reformat_jaspar_2018_clustering.sh
new file mode 100755
index 0000000..efbf3c3
--- /dev/null
+++ b/scripts/pwm/reformat_jaspar_2018_clustering.sh
@@ -0,0 +1,9 @@
+pwmtool_dir='scripts/pwm_tools'
+data_dir="data/pwm/jaspar_2018_clustering"
+
+
+for file_tf in $(ls $data_dir/*tf)
+do
+	file_prob=$(basename $file_tf | sed s/.tf/_prob.mat/)
+	python3.6 $pwmtool_dir/extract_transfac_pwm.py -i $file_tf --norm > $data_dir/$file_prob
+done
diff --git a/scripts/pwm_tools/extract_transfac_pwm.py b/scripts/pwm_tools/extract_transfac_pwm.py
new file mode 100644
index 0000000..2c7a189
--- /dev/null
+++ b/scripts/pwm_tools/extract_transfac_pwm.py
@@ -0,0 +1,70 @@
+import optparse
+import os
+import re
+
+def parse_file(file_in, norm, pseudocount):
+
+	# matrix in vertical format
+	matrix_v = list()
+	with open(file_in) as f_in:
+
+		# if currently reading the matrix		
+		matrix = False
+
+		for line in f_in:
+			line = line.rstrip()
+			# before matrix
+			if matrix is False:
+				if line.startswith("P0") or line.startswith("PO"):
+					matrix = True
+			# inside matrix			
+			else:
+				# end of matrix
+				if re.match(r"^\d+", line) is None:				
+				# if line.startswith("XX"):
+					matrix = False
+					break
+				# inside matrix
+				else:
+					values = re.split(r"\s+", line)[1:]
+					values = [float(x)+pseudocount for x in values]
+					if norm:
+						tot = sum(values)
+						values = [x/tot for x in values]
+					matrix_v.append(values)
+	# matrix in vertical format
+	return matrix_v
+
+if __name__ == "__main__":
+
+	# parse options
+	usage = "usage: %s [options]" % os.path.basename(__file__)
+	epilog = "This program reads a transfac PWM file returns the PWM." \
+	         "Written by Romain Groux, August 2019"
+	parser = optparse.OptionParser(usage=usage, epilog=epilog)
+	parser.add_option("-i", "--input", dest="file_in", default=None, type="string", action="store",
+	                  help="The addresse of the transfac PWM file.")
+	parser.add_option("--norm", dest="norm", action="store_true",
+			              help="Whether the values should be normalized to probabilities.")
+	(options, args) = parser.parse_args()
+
+	file_in = options.file_in
+	norm = options.norm
+
+	# matrix in vertical format
+	matrix_v = parse_file(file_in, norm, 1)
+	
+	# matrix in horizontal format
+	nrow = 4
+	ncol = len(matrix_v)
+	matrix_h = ["" for _ in range(0, nrow, 1)]
+	for i in range(0, nrow, 1):
+		for j in range(0, ncol, 1):
+			matrix_h[i] += "%.4f\t" % matrix_v[j][i]
+		matrix_h[i] = matrix_h[i][:-1]
+
+	# print matrix in horizontal format
+	print('\n'.join(matrix_h))
+
+
+
diff --git a/scripts/run_all.sh b/scripts/run_all.sh
index a698e65..539db86 100755
--- a/scripts/run_all.sh
+++ b/scripts/run_all.sh
@@ -1,11 +1,13 @@
-# install programs
+# setup environment
+## install programs
 scripts/install_programs/run_all.sh
-
-# install libraries
+## install libraries
 scripts/install_libraries/run_all.sh
 
+
+
 # simulate data for testing purposes
 scripts/generate_toy_data/run_all.sh
 
 
 
diff --git a/scripts/test.R b/scripts/test.R
new file mode 100644
index 0000000..68eb808
--- /dev/null
+++ b/scripts/test.R
@@ -0,0 +1,284 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+#' Converts a sequence in character format 
+#' to integer format A->0, C->1, N->2, G->3
+#' T->4.
+#' \param seq a vector containing the sequence
+#' in character format.
+#' \return a vector containing the sequence
+#' in integer format.
+#' \author Romain Groux
+char.to.int = function(seq)
+{ seq_int = vector(length=length(seq))
+  for(i in 1:length(seq))
+  { if(seq[i] == 'A') { seq_int[i] = 0 }
+    if(seq[i] == 'C') { seq_int[i] = 1 }
+    if(seq[i] == 'N') { seq_int[i] = 2 }
+    if(seq[i] == 'G') { seq_int[i] = 3 }
+    if(seq[i] == 'T') { seq_int[i] = 4 }
+  }
+  return(seq_int)
+}
+
+#' Generates the reverse complement of a kmer.
+#' \param kmer a vector containing the kmer in
+#' integer format.
+#' \return a vector containing the reverse 
+#' complement kmer
+#' \author Romain Groux
+get_rev_compl = function(kmer)
+{ kmer_rv = vector(length=length(kmer), mode="numeric")
+  i_rv = length(kmer)
+  for(i in 1:length(kmer))
+  { if(kmer[i] == 0) { kmer_rv[i_rv] = 4 } # A
+    if(kmer[i] == 1) { kmer_rv[i_rv] = 3 } # C
+    if(kmer[i] == 2) { kmer_rv[i_rv] = 2 } # N
+    if(kmer[i] == 3) { kmer_rv[i_rv] = 1 } # G
+    if(kmer[i] == 4) { kmer_rv[i_rv] = 0 } # T
+  }
+  return(kmer_rv)
+}
+
+#' Generates a hash given a kmer.
+#' Kmers with a same length are guaranteed
+#' to have different hashes.
+#' AA..AA will generate a hash of 1,
+#' AA..AC will generate a hash of 2,
+#' AA..AN will generate a hash of 3,
+#' AA..AG will generate a hash of 4,
+#' AA..AT will generate a hash of 5,
+#' TT..TG will generate a hash of 5**k - 1,
+#' TT..TT will generate a hash of 5**k
+#' \param seq a vector containing the kmer
+#' in integer format : A->0, C->1, N->2, G->3, 
+#' T->4.
+#' \return the kmer hash
+#' \author Romain Groux
+hash = function(seq)
+{ k = length(seq) ; z = 5
+  h = 0
+  for(i in 0:(length(seq)-1))
+  { if(seq[i+1] == 0) { h = h + (0*(z**(k-i-1))) } # A
+    if(seq[i+1] == 1) { h = h + (1*(z**(k-i-1))) } # C
+    if(seq[i+1] == 2) { h = h + (2*(z**(k-i-1))) } # N
+    if(seq[i+1] == 3) { h = h + (3*(z**(k-i-1))) } # G
+    if(seq[i+1] == 4) { h = h + (4*(z**(k-i-1))) } # T
+  }
+  return(h+1)
+}
+
+#' Computes the hash of a sequence and of
+#' its reverse complement and returns the
+#' smallest one.
+#' \param seq a vector containing the 
+#' sequence in integer format : : A->0, 
+#' C->1, N->2, G->3, T->4.
+#' \author Romain Groux
+hash.min(seq)
+{ seq_r = get_rev_compl(seq)
+  return(min(hash(seq), hash(seq_r)))
+}
+
+#' Generates all kmers for a given value of K 
+#' and return them in lexicographic order.
+#' \param k the kmer length.
+#' \return a matrix with the different kmers
+#' on the rows and k columns. The kmers are
+#' in integer format : A->0, C->1, N->2, G->3,
+#' T->4.
+#' \author Romain Groux
+generate_all_kmers = function(k)
+{ kmers = matrix(nrow=5**k, ncol=k, data=-1)
+  n = k
+  currentWord = rep(1, n)
+  i = 1
+  while(n > 0)
+  { kmers[i,] = currentWord
+    i = i + 1
+    while(n>0 && currentWord[n+1-1] == 5)
+    { currentWord[n] = 1
+      n = n - 1
+    }
+    if(n > 0)
+    { currentWord[n] = currentWord[n] + 1
+    n = k
+    }
+  }
+  return(kmers - 1)
+}
+
+
+data = as.matrix(read.table(file.path("data",
+                                      "10xgenomics_PBMC_5k_peaks",
+                                      "peaks_rmsk_sampled_sequences_1kb.mat")))
+
+data = as.matrix(read.table(file.path("data/toy_data/simulated_sequences_2class_flip.mat")))
+data = apply(data, 1, char.to.int)
+
+k       = 5
+n_kmer  = 5**k
+hmax    = ceiling(n_kmer / 2)
+n_shift = ncol(data) - k + 1
+
+# transitions and counts
+counts = vector(length=n_kmer, mode="numeric")
+kmers  = generate_all_kmers(k)
+counts = vector(length=n_kmer, mode="numeric")
+t_out  = matrix(nrow=n_kmer, ncol=n_kmer, data=0)
+t_in   = t_out
+t_all  = t_out
+for(i in 1:nrow(data))
+{ for(j in 1:n_shift)
+  { # no in transition (1st kmer)
+    if(j == 1)
+    { # kmer1 < kmer2
+      from1 = j   ; to1 = from1 + k - 1 ; kmer1 = data[i,from1:to1] ;
+      from2 = j+1 ; to2 = from2 + k - 1 ; kmer2 = data[i,from2:to2] ;
+      kmer1r = get_rev_compl(kmer2) ; kmer2r = get_rev_compl(kmer1) ;
+      idx1  = hash(kmer1) ; idx1r = hash(kmer1r) ;
+      idx2  = hash(kmer2) ; idx2r = hash(kmer2r) ;
+      # out transition kmer1 -> kmer2
+      t_out[idx1,idx2]   = t_out[idx1,idx2]   + 1
+      t_out[idx1r,idx2r] = t_out[idx1r,idx2r] + 1
+      # number of edges
+      t_all[idx1,idx2]   = t_all[idx1,idx2]   + 1  
+      t_all[idx2,idx1]   = t_all[idx2,idx1]   + 1  
+      t_all[idx1r,idx2r] = t_all[idx1r,idx2r] + 1  
+      t_all[idx2r,idx1r] = t_all[idx2r,idx1r] + 1  
+      # counts
+      counts[idx1]  = counts[idx1]  + 1
+      counts[idx1r] = counts[idx1r] + 1
+    }
+    # no out transition (last kmer)
+    else if(j == n_shift)
+    { # kmer1 < kmer2
+      from1 = j-1 ; to1 = from1 + k - 1 ; kmer1 = data[i,from1:to1] ;
+      from2 = j   ; to2 = from2 + k - 1 ; kmer2 = data[i,from2:to2] ;
+      kmer1r = get_rev_compl(kmer2) ; kmer2r = get_rev_compl(kmer1) ;
+      idx1  = hash(kmer1) ; idx1r = hash(kmer1r) ;
+      idx2  = hash(kmer2) ; idx2r = hash(kmer2r) ;
+      # in transition kmer1 <- kmer2
+      t_in[idx1,idx2]   = t_in[idx1,idx2]   + 1
+      t_in[idx1r,idx2r] = t_in[idx1r,idx2r] + 1
+      # number of edges
+      t_all[idx1,idx2]   = t_all[idx1,idx2]   + 1  
+      t_all[idx2,idx1]   = t_all[idx2,idx1]   + 1  
+      t_all[idx1r,idx2r] = t_all[idx1r,idx2r] + 1  
+      t_all[idx2r,idx1r] = t_all[idx2r,idx1r] + 1  
+      # counts
+      # no need, kmer2 was counted at last iteration as kmer2
+    }
+    # both out and in transitions (middle)
+    else
+    { # kmer0 < kmer1 < kmer2
+      from0 = j   ; to0 = from0 + k - 1 ; kmer0 = data[i,from0:to0] ;
+      from1 = j   ; to1 = from1 + k - 1 ; kmer1 = data[i,from1:to1] ;
+      from2 = j+1 ; to2 = from2 + k - 1 ; kmer2 = data[i,from2:to2] ;
+      kmer0r = get_rev_compl(kmer2) ; kmer1r = get_rev_compl(kmer1) ; kmer2r = get_rev_compl(kmer0) ;
+      idx0  = hash(kmer0) ; idx0r = hash(kmer0r) ;
+      idx1  = hash(kmer1) ; idx1r = hash(kmer1r) ;
+      idx2  = hash(kmer2) ; idx2r = hash(kmer2r) ;
+      # out transition kmer1 -> kmer2
+      t_out[idx1,idx2]   = t_out[idx1,idx2]   + 1
+      t_out[idx1r,idx2r] = t_out[idx1r,idx2r] + 1
+      # in transition kmer0 -> kmer1
+      t_in[idx1,idx0]   = t_in[idx1,idx0]   + 1
+      t_in[idx1r,idx0r] = t_in[idx1r,idx0r] + 1
+      # number of edges
+      t_all[idx0,idx1]   = t_all[idx0,idx1]   + 1  
+      t_all[idx1,idx0]   = t_all[idx1,idx0]   + 1
+      t_all[idx1,idx2]   = t_all[idx1,idx2]   + 1
+      t_all[idx2,idx1]   = t_all[idx2,idx1]   + 1
+      t_all[idx0r,idx1r] = t_all[idx0r,idx1r] + 1
+      t_all[idx1r,idx0r] = t_all[idx1r,idx0r] + 1
+      t_all[idx1r,idx2r] = t_all[idx1r,idx2r] + 1  
+      t_all[idx2r,idx1r] = t_all[idx2r,idx1r] + 1
+      # counts
+      counts[idx1]  = counts[idx1]  + 1
+      counts[idx1r] = counts[idx1r] + 1
+    }
+  }
+}
+
+# spectral clustering
+# t_all is the affinity matrix
+# compute the degree matrix
+d = diag(apply(t_in, 1, sum)) # sum rows
+# unormalized laplacian
+u = d - t_in
+# get eigen values and vectors
+evL = eigen(u, symmetric=TRUE)
+# plot eigen values
+plot(1:20, rev(evL$values)[1:20], type='b')
+# partition
+partitions = list()
+for(n_clust in 2:20)
+{ print(n_clust)
+  # get K biggest eigen values and vectors -> embedding space
+  z   = evL$vectors[,(ncol(evL$vectors)-n_clust+1):ncol(evL$vectors)]
+  partitions[[n_clust]] = kmeans(z, centers=n_clust, iter.max=100, nstart=10)
+}
+
+
+plot(evL$vectors[,3124:3125])
+
+# motif 1 is ACGTTGCA
+kmers_motif1 = matrix(ncol=k,
+                      data=c(0,1,2,3,3,
+                               1,2,3,3,2,
+                                 2,3,3,2,1,
+                                   3,3,2,1,0),
+                      byrow=T)
+# motif 2 is GCGAATTT
+kmers_motif2 = matrix(ncol=k,
+                      data=c(2,1,2,0,0,
+                               1,3,0,0,3,
+                                 3,0,0,3,3,
+                                   0,0,3,3,3),
+                      byrow=T)
+idx1 = apply(kmers_motif1, 1, hash)
+idx2 = apply(kmers_motif2, 1, hash)
+
+partitions[[2]]$size
+
+partitions[[2]]$cluster[idx1]
+partitions[[2]]$cluster[idx2]
+
+
+
+
+c1 = which(partitions[[2]]$cluster == 1)
+c2 = which(partitions[[2]]$cluster == 2)
+
+plot(evL$vectors[,3124:3125], col=partitions[[2]]$cluster+1, cex=0.1)
+
+points(evL$vectors[idx1,3124:3125], col=2)
+points(evL$vectors[idx2,3124:3125], col=3)
+
+par(mfrow=c(3,1))
+plot(t_all[idx1[1],], type='l', ylim=c(0,50)) ; abline(v=idx1, col="red", lwd=0.2) ; abline(v=idx1[1], col="blue")
+plot(t_all[idx1[2],], type='l', ylim=c(0,50)) ; abline(v=idx1, col="red", lwd=0.2)
+plot(t_all[idx1[3],], type='l', ylim=c(0,50)) ; abline(v=idx1, col="red", lwd=0.2)
+
+
+boxplot(counts, counts[idx1], counts[idx2], outline=F)
+
+
+
+
+# reconstruct kmers
+best.k = 2
+partition = partitions[[best.k]]
+clusters  = partition$cluster
+c1 = which(clusters == 1)
+c2 = which(clusters == 2)
+best1 = which.max(counts[c1])
+best2 = which.max(counts[c2])
+
+
diff --git a/scripts/test/analysis_test_sampled.R b/scripts/test/analysis_test_sampled.R
new file mode 100644
index 0000000..afcf023
--- /dev/null
+++ b/scripts/test/analysis_test_sampled.R
@@ -0,0 +1,97 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+library(seqLogo)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the number of classes searched
+n.classes = c(10, 20, 30)
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around ctcf motifs ##################
+
+for(k in n.classes)
+{ 
+  # sequence
+  data = read.sequence.models(file.path("results", "test_1kb",
+                                        sprintf("peaks_rmsk_sampled_sequences_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  data = NULL
+  
+  # open chromatin
+  model.open = read.read.models(file.path("results", "test_1kb", 
+                                          sprintf("peaks_rmsk_sampled_openchromatin_%dclass_model.mat", k)))$models
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "test_1kb",
+                                          sprintf("peaks_rmsk_sampled_nucleosomes_%dclass_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=26, height=12)
+  png(filename=file.path("results", "test_1kb",
+                         sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:30, nrow=6, ncol=5, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    # inlets with center
+    # row_n = 1 # row counter
+    # col_n = 1 # column counter
+    # for(i in 1:nrow(ref.open))
+    # { # plot logo center
+    #   right  = 0.5*col_n - 0.01
+    #   left   = right - 0.2
+    #   bottom = 1-(row_n*(0.2))+0.05
+    #   top    = bottom + 0.15
+    #   par(fig=c(left, right, bottom, top), new=T)
+    #   idx = (391-1-20):(391+1+20)
+    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    #   # xaxis
+    #   x.at = seq(1, length(idx), length.out = 3)
+    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+    #   axis(1, at=x.at, labels=x.lab)
+    #   # yaxis
+    #   axis(2, at=y.at, labels=y.lab)
+    #   row_n = row_n + 1
+    #   if(i %% 5 == 0)
+    #   { col_n = col_n + 1
+    #     row_n = 1
+    #   }
+    # }
+  dev.off()
+}
+
diff --git a/scripts/test/test.sh b/scripts/test/test.sh
deleted file mode 100644
index c46b711..0000000
--- a/scripts/test/test.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-# some paths
-## directories
-results_dir='results/test'
-data_dir='results/10xgenomics_PBMC_5k'
-## input
-file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
-file_mat_seq="test.mat"
-file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
-## file with seeds
-file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'
-
-mkdir -p $results_dir
-touch $file_seed
-
-# parameters
-n_iter='20'
-n_shift='21'
-seeding='random'
-n_core=1
-
-# open chromatin and sequence
-for k in 10
-do
-	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
-	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_sequence_'$k'class_prob.mat4d'
-	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
-	file_mod2=$results_dir/'ctcf_motifs_10e-6_sequence_'$k'class_model.mat'
-	echo "$file_prob $seed" >> $file_seed
-	bin/ChIPPartitioning --read $file_mat_open,$file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed --thread $n_core > $file_prob
-	bin/probToModel      --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
-	bin/probToModel      --read $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod2
-done
diff --git a/scripts/test/test_1kb.sh b/scripts/test/test_1kb.sh
new file mode 100755
index 0000000..3bc3541
--- /dev/null
+++ b/scripts/test/test_1kb.sh
@@ -0,0 +1,35 @@
+
+# paths
+## dir
+data_dir="results/10xgenomics_PBMC_5k"
+results_dir="results/test_1kb"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_open_bin1bp_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_nucleosomes_bin1bp_1kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='951'
+n_core=12
+
+# classify
+for k in 10 20 30
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sequences_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_openchromatin_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_nucleosomes_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sequences_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/test/test_1kb_pwms.sh b/scripts/test/test_1kb_pwms.sh
new file mode 100755
index 0000000..f7c51f8
--- /dev/null
+++ b/scripts/test/test_1kb_pwms.sh
@@ -0,0 +1,49 @@
+
+# paths
+## dir
+pwm_dir="data/pwm/jaspar_2018_clustering/"
+data_dir="results/10xgenomics_PBMC_5k"
+results_dir="results/test_1kb_pwms"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_open_bin1bp_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_nucleosomes_1kb_bin1bp_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sequences_1kb.mat'
+## PWM files
+jun="$pwm_dir/cluster_3_node_23_20_motifs_prob.mat"
+hif1a="$pwm_dir/cluster_4_node_31_3_motifs_prob.mat"
+myc="$pwm_dir/cluster_4_node_22_4_motifs_prob.mat"
+pu1="$pwm_dir/cluster_7_node_13_2_motifs_prob.mat"
+cebpb="$pwm_dir/cluster_5_node_20_5_motifs_prob.mat"
+irf4="$pwm_dir/cluster_31_node_4_5_motifs_prob.mat"
+irf2="$pwm_dir/cluster_31_node_5_2_motifs_prob.mat"
+lhx3="$pwm_dir/cluster_1_node_74_2_motifs_prob.mat"
+foxh1="$pwm_dir/cluster_66_1_motifs_prob.mat"
+sox3="$pwm_dir/cluster_33_node_1_2_motifs_prob.mat"
+mef2c="$pwm_dir/cluster_20_4_motifs_prob.mat"
+elf5="$pwm_dir/cluster_7_node_17_5_motifs_prob.mat"
+stat6="$pwm_dir/cluster_32_node_STAT6_1_motifs_prob.mat"
+nfe2="$pwm_dir/cluster_3_node_24_4_motifs_prob.mat"
+ahr="$pwm_dir/cluster_4_node_30_2_motifs_prob.mat"
+elf2="$pwm_dir/cluster_39_node_1_2_motifs_prob.mat"
+ctcf="$pwm_dir/cluster_48_node_ctcf_1_motifs_prob.mat"
+
+mkdir -p $results_dir
+
+# EM param
+n_iter='100'
+n_shift='951'
+n_core=12
+
+# classify
+## results files
+file_prob=$results_dir/'peaks_rmsk_sequences_1kb_15class_prob.mat4d'
+file_mod1=$results_dir/'peaks_rmsk_openchromatin_1kb_15class_model.mat'
+file_mod2=$results_dir/'peaks_rmsk_nucleosomes_1kb_15class_model.mat'
+file_mod3=$results_dir/'peaks_rmsk_sequences_1kb_15class_model.mat'
+
+bin/EMSequence --seq $file_mat_seq --motifs $jun,$hif1a,$myc,$pu1,$cebpb,$irf4,$irf2,$lhx3,$foxh1,$sox3,$mef2c,$elf5,$nfe2,$ahr,$elf2 --shift $n_shift --flip --iter $n_iter --thread $n_core > $file_prob
+
+bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+
diff --git a/scripts/test/test_1kb_sampled.sh b/scripts/test/test_1kb_sampled.sh
new file mode 100755
index 0000000..edd22fe
--- /dev/null
+++ b/scripts/test/test_1kb_sampled.sh
@@ -0,0 +1,35 @@
+
+# paths
+## dir
+data_dir="results/10xgenomics_PBMC_5k"
+results_dir="results/test_1kb"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_bin1bp_1kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='951'
+n_core=12
+
+# classify
+for k in 10 20 30
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/test/test_1kb_sampled_pwms.sh b/scripts/test/test_1kb_sampled_pwms.sh
new file mode 100755
index 0000000..454cae9
--- /dev/null
+++ b/scripts/test/test_1kb_sampled_pwms.sh
@@ -0,0 +1,49 @@
+
+# paths
+## dir
+pwm_dir="data/pwm/jaspar_2018_clustering/"
+data_dir="results/10xgenomics_PBMC_5k"
+results_dir="results/test_1kb_pwms"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_1kb_bin1bp_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
+## PWM files
+jun="$pwm_dir/cluster_3_node_23_20_motifs_prob.mat"
+hif1a="$pwm_dir/cluster_4_node_31_3_motifs_prob.mat"
+myc="$pwm_dir/cluster_4_node_22_4_motifs_prob.mat"
+pu1="$pwm_dir/cluster_7_node_13_2_motifs_prob.mat"
+cebpb="$pwm_dir/cluster_5_node_20_5_motifs_prob.mat"
+irf4="$pwm_dir/cluster_31_node_4_5_motifs_prob.mat"
+irf2="$pwm_dir/cluster_31_node_5_2_motifs_prob.mat"
+lhx3="$pwm_dir/cluster_1_node_74_2_motifs_prob.mat"
+foxh1="$pwm_dir/cluster_66_1_motifs_prob.mat"
+sox3="$pwm_dir/cluster_33_node_1_2_motifs_prob.mat"
+mef2c="$pwm_dir/cluster_20_4_motifs_prob.mat"
+elf5="$pwm_dir/cluster_7_node_17_5_motifs_prob.mat"
+# stat6="$pwm_dir/cluster_32_node_STAT6_1_motifs_prob.mat"
+nfe2="$pwm_dir/cluster_3_node_24_4_motifs_prob.mat"
+ahr="$pwm_dir/cluster_4_node_30_2_motifs_prob.mat"
+elf2="$pwm_dir/cluster_39_node_1_2_motifs_prob.mat"
+# ctcf="$pwm_dir/cluster_48_node_ctcf_1_motifs_prob.mat"
+
+mkdir -p $results_dir
+
+# EM param
+n_iter='100'
+n_shift='951'
+n_core=12
+
+# classify
+## results files
+file_prob=$results_dir/'peaks_rmsk_sampled_sequences_1kb_15class_prob.mat4d'
+file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_1kb_15class_model.mat'
+file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_1kb_15class_model.mat'
+file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_1kb_15class_model.mat'
+
+bin/EMSequence --seq $file_mat_seq --motifs $jun,$hif1a,$myc,$pu1,$cebpb,$irf4,$irf2,$lhx3,$foxh1,$sox3,$mef2c,$elf5,$nfe2,$ahr,$elf2 --shift $n_shift --flip --iter $n_iter --thread $n_core > $file_prob
+
+bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+
diff --git a/scripts/test/test_2kb.sh b/scripts/test/test_2kb.sh
new file mode 100755
index 0000000..dbabf08
--- /dev/null
+++ b/scripts/test/test_2kb.sh
@@ -0,0 +1,36 @@
+
+# paths
+## dir
+data_dir="results/10xgenomics_PBMC_5k"
+results_dir="results/test_2kb"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_open_bin1bp_2kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_nucleosomes_bin1bp_2kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sequences_2kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='201'
+n_core=12
+
+# classify
+for k in 10 20 30
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sequences_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_openchromatin_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_nucleosomes_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sequences_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
+
diff --git a/scripts/test/test_2kb_sampled.sh b/scripts/test/test_2kb_sampled.sh
new file mode 100755
index 0000000..2f3e309
--- /dev/null
+++ b/scripts/test/test_2kb_sampled.sh
@@ -0,0 +1,36 @@
+
+# paths
+## dir
+data_dir="results/10xgenomics_PBMC_5k"
+results_dir="results/test_2kb"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_2kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_bin1bp_2kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_2kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='201'
+n_core=12
+
+# classify
+for k in 10 20 30
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
+
diff --git a/scripts/test_2/analysis_test_2_sampled.R b/scripts/test_2/analysis_test_2_sampled.R
new file mode 100644
index 0000000..afcf023
--- /dev/null
+++ b/scripts/test_2/analysis_test_2_sampled.R
@@ -0,0 +1,97 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+library(seqLogo)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+# the number of classes searched
+n.classes = c(10, 20, 30)
+
+# path to the images for the logo
+path.a = file.path("res/A.png")
+path.c = file.path("res/C.png")
+path.g = file.path("res/G.png")
+path.t = file.path("res/T.png") 
+
+################## sequence patterns around ctcf motifs ##################
+
+for(k in n.classes)
+{ 
+  # sequence
+  data = read.sequence.models(file.path("results", "test_1kb",
+                                        sprintf("peaks_rmsk_sampled_sequences_%dclass_model.mat", k)))
+  model.seq = data$models
+  model.prob = data$prob
+  data = NULL
+  
+  # open chromatin
+  model.open = read.read.models(file.path("results", "test_1kb", 
+                                          sprintf("peaks_rmsk_sampled_openchromatin_%dclass_model.mat", k)))$models
+  # nucleosomes
+  model.nucl = read.read.models(file.path("results", "test_1kb",
+                                          sprintf("peaks_rmsk_sampled_nucleosomes_%dclass_model.mat", k)))$models
+  
+  # plot classes
+  col = brewer.pal(3, "Set1")
+  # X11(width=26, height=12)
+  png(filename=file.path("results", "test_1kb",
+                         sprintf("peaks_rmsk_sampled_sequences_%dclass.png", k)),
+      units="in", res=720, width=18, height=12)
+    m = matrix(1:30, nrow=6, ncol=5, byrow=F)
+    layout(m)
+    # order from most to least probable class
+    ord      = order(model.prob, decreasing=T)
+    ref.open = model.open[ord,, drop=F]
+    ref.nucl = model.nucl[ord,, drop=F]
+    ref.seq  = model.seq[,,ord, drop=F]
+    prob     = model.prob[ord]
+    class    = c(1:nrow(ref.open))[ord]
+    for(i in 1:nrow(ref.open))
+    { # plot logo
+      plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
+                main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
+      # x-axis
+      x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2, length.out=3)
+      x.at = seq(1, ncol(ref.open), length.out=length(x.lab))
+      axis(1, at=x.at, labels=x.lab)
+      # y-axis is [0,1] for min/max signal
+      y.at  = seq(0, 2, length.out=2)
+      y.lab = c("min", "max")
+      axis(2, at=y.at, labels=y.lab)
+      # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+      lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
+      lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    }
+    # inlets with center
+    # row_n = 1 # row counter
+    # col_n = 1 # column counter
+    # for(i in 1:nrow(ref.open))
+    # { # plot logo center
+    #   right  = 0.5*col_n - 0.01
+    #   left   = right - 0.2
+    #   bottom = 1-(row_n*(0.2))+0.05
+    #   top    = bottom + 0.15
+    #   par(fig=c(left, right, bottom, top), new=T)
+    #   idx = (391-1-20):(391+1+20)
+    #   plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
+    #   # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
+    #   lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
+    #   lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
+    #   # xaxis
+    #   x.at = seq(1, length(idx), length.out = 3)
+    #   x.lab = seq(-(ncol(ref.open)-1)/2, (ncol(ref.open)-1)/2)[idx][x.at]
+    #   axis(1, at=x.at, labels=x.lab)
+    #   # yaxis
+    #   axis(2, at=y.at, labels=y.lab)
+    #   row_n = row_n + 1
+    #   if(i %% 5 == 0)
+    #   { col_n = col_n + 1
+    #     row_n = 1
+    #   }
+    # }
+  dev.off()
+}
+
diff --git a/scripts/test_2/test_1kb_sampled.sh b/scripts/test_2/test_1kb_sampled.sh
new file mode 100755
index 0000000..2b9e096
--- /dev/null
+++ b/scripts/test_2/test_1kb_sampled.sh
@@ -0,0 +1,35 @@
+
+# paths
+## dir
+data_dir="results/10xgenomics_PBMC_5k"
+results_dir="results/test_1kb_2"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_1kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_bin1bp_1kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_1kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='971'
+n_core=14
+
+# classify
+for k in 10 20 30
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
diff --git a/scripts/test_2/test_2kb_sampled.sh b/scripts/test_2/test_2kb_sampled.sh
new file mode 100755
index 0000000..02af0c9
--- /dev/null
+++ b/scripts/test_2/test_2kb_sampled.sh
@@ -0,0 +1,36 @@
+
+# paths
+## dir
+data_dir="results/10xgenomics_PBMC_5k"
+results_dir="results/test_2kb_2"
+## matrix files
+file_mat_open=$data_dir/'peaks_rmsk_sampled_open_bin1bp_2kb_read_atac.mat'
+file_mat_nucl=$data_dir/'peaks_rmsk_sampled_nucleosomes_bin1bp_2kb_fragment_center.mat'
+file_mat_seq=$data_dir/'peaks_rmsk_sampled_sequences_2kb.mat'
+## file with seeds
+file_seed=$results_dir'/peaks_rmsk_sampled_seed.txt'
+
+mkdir -p $results_dir
+touch $file_seed
+
+# EM param
+n_iter='100'
+n_shift='971'
+n_core=14
+
+# classify
+for k in 10 20 30
+do
+	## results files
+	file_prob=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_prob.mat4d'
+	file_mod1=$results_dir/'peaks_rmsk_sampled_openchromatin_'$k'class_model.mat'
+	file_mod2=$results_dir/'peaks_rmsk_sampled_nucleosomes_'$k'class_model.mat'
+	file_mod3=$results_dir/'peaks_rmsk_sampled_sequences_'$k'class_model.mat'	
+	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 | head -c${1:-15};echo)
+	echo "$file_prob $seed" >> $file_seed
+	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core > $file_prob
+	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
+	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod2
+	bin/ProbToModel --seq  $file_mat_seq  --prob $file_prob --thread $n_core 1> $file_mod3
+done
+
diff --git a/scripts/test_dendrogram.R b/scripts/test_dendrogram.R
new file mode 100644
index 0000000..708c704
--- /dev/null
+++ b/scripts/test_dendrogram.R
@@ -0,0 +1,105 @@
+setwd(file.path("/", "local", "groux", "scATAC-seq"))
+
+# libraries
+library(RColorBrewer)
+library(motifStack)
+library(TFBSTools)
+library(MotifDb)
+
+# functions
+source(file.path("scripts", "functions.R"))
+
+get_pfm_list = function(motifs, prefix_name)
+{ pfm_list = list()
+
+  for(i in 1:dim(motifs)[3])
+  { pfm_list[[i]] = new("pfm",
+                        mat=motifs[,,i], 
+                        name=sprintf("%s class %d", prefix_name, i))
+  }
+  return(pfm_list)
+}
+
+
+# number of classes searched in the data
+n_classes = c(17, 20, 30)
+
+# load motifs from JASPAR clustering used to initialise the classes
+motifs_jaspar_paths = c("data/pwm/jaspar_2018_clustering/cluster_3_node_23_20_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_4_node_31_3_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_4_node_22_4_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_7_node_13_2_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_5_node_20_5_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_31_node_4_5_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_31_node_5_2_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_1_node_74_2_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_66_1_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_33_node_1_2_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_20_4_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_7_node_17_5_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_32_node_STAT6_1_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_3_node_24_4_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_4_node_30_2_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_39_node_1_2_motifs_prob.mat",
+                        "data/pwm/jaspar_2018_clustering/cluster_48_node_ctcf_1_motifs_prob.mat")
+motifs_jaspar = lapply(motifs_jaspar_paths, read.table)
+motifs_jaspar = lapply(motifs_jaspar,       as.matrix)
+motifs_jaspar_names = c("jun",
+                        "HIF1A",
+                        "myc",
+                        "PU.1",
+                        "CEBPb",
+                        "Irf4",
+                        "Irf2",
+                        "LHX3",
+                        "Fox1H",
+                        "Sox3",
+                        "Mef2c",
+                        "Elf5",
+                        "STAT6",
+                        "NFE2",
+                        "AHR",
+                        "E2F2",
+                        "CTCF")
+tmp = list()
+for(i in 1:length(motifs_jaspar))
+{ rownames(motifs_jaspar[[i]]) = c("A", "C", "G", "T")
+  tmp[[i]] = new("pfm",
+                 mat=motifs_jaspar[[i]], 
+                 name=motifs_jaspar_names[i])
+}
+motifs_jaspar = tmp
+rm(tmp)
+
+for(n_class in n_classes)
+{ # load classes found
+  motifs_found = get_pfm_list(read.sequence.models(file.path("results",
+                                                             "10xgenomics_PBMC_5k_peaks_classification_4",
+                                                             sprintf("peaks_rmsk_sampled_sequences_1kb_%dclass_model.mat", n_class)))$models,
+                              "")
+  # colors
+  red   = brewer.pal(3, "Set1")[1]
+  blue  = brewer.pal(3, "Set1")[2]
+  color = c(rep(blue, length(motifs_jaspar)),
+            rep(red,  length(motifs_found)))
+  # plot logo stack with radial style
+  # X11(height=12, width=12)
+  png(filename=file.path(sprintf("test_%dclass.png", n_class)),
+      units="in", res=720, width=14, height=14)
+    motifStack(c(motifs_jaspar,
+                 motifs_found),
+               layout="radialPhylog", 
+               circle=0.3,
+               cleaves = 0.2, 
+               clabel.leaves = 0.5, 
+               col.bg=color,
+               col.bg.alpha=0.3, 
+               col.leaves=color,
+               col.inner.label.circle=color, 
+               inner.label.circle.width=0.05,
+               col.outer.label.circle=color, 
+               outer.label.circle.width=0.02, 
+               circle.motif=.5,
+               angle=350)
+  dev.off()
+}
diff --git a/src/Applications/ChIPPartitioningApplication.cpp b/src/Applications/ChIPPartitioningApplication.cpp
index 943afc7..fabdad0 100644
--- a/src/Applications/ChIPPartitioningApplication.cpp
+++ b/src/Applications/ChIPPartitioningApplication.cpp
@@ -1,174 +1,176 @@
 
 #include <ChIPPartitioningApplication.hpp>
 #include <EMEngine.hpp>
 
 #include <iostream>
 #include <string>
 #include <stdexcept>                   // std::invalid_argument
 #include <boost/program_options.hpp>
 #include <boost/algorithm/string.hpp>  //
 
+#include <Matrix2D.hpp>
+
 namespace po = boost::program_options ;
 
 
 ChIPPartitioningApplication::ChIPPartitioningApplication(int argn, char** argv)
     : file_read(""), file_sequence(""), n_class(0), n_iter(0), n_shift(0), flip(false),
       n_threads(0), seeding(EMEngine::seeding_codes::RANDOM),
       seed(""), runnable(true)
 {
     // parse command line options and set the fields
     this->parseOptions(argn, argv) ;
 }
 
 int ChIPPartitioningApplication::run()
 {   if(this->runnable)
     {   // read data
         std::vector<std::string> read_paths ;
         boost::split(read_paths, this->file_read, [](char c){return c == ',';});
-        std::vector<matrix2d_i> data_read ;
+        std::vector<Matrix2D<int>> data_read ;
         for(const auto& path : read_paths)
         {   if(path == "")
             {   continue ; }
-            data_read.push_back(read_matrix2d_i(path)) ;
+            data_read.push_back(Matrix2D<int>(path)) ;
         }
         // sequence data
-        std::vector<matrix2d_i> data_seq ;
+        std::vector<Matrix2D<int>> data_seq ;
         if(this->file_sequence != "")
-        {   data_seq.push_back(read_matrix2d_i(this->file_sequence)) ; }
+        {   data_seq.push_back(Matrix2D<int>(this->file_sequence)) ; }
 
         EMEngine em(data_read,
                     data_seq,
                     this->n_class,
                     this->n_iter,
                     this->n_shift,
                     this->flip,
                     this->seeding,
                     this->seed,
                     this->n_threads) ;
         em.classify() ;
         std::cout << em.get_post_prob() << std::endl ;
         return EXIT_SUCCESS ;
     }
     else
     {   return EXIT_FAILURE ; }
 }
 
 void ChIPPartitioningApplication::parseOptions(int argn, char** argv)
 {
     // no option to parse
     if(argv == nullptr)
     {   std::string message = "no options to parse!" ;
         throw std::invalid_argument(message) ;
     }
 
     // help messages
     std::string desc_msg =         "\n"
                                    "ChIPPartitioning is a probabilistic partitioning algorithm that \n"
                                    "sofetly assigns genomic regions to classes given their shape \n"
                                    "of the signal over the region. The assignment probabilities \n"
                                    "are returned through stdout.\n\n" ;
     std::string opt_help_msg     = "Produces this help message." ;
     std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations,\n "
                                    "by default 0 (no parallelization)." ;
     std::string opt_read_msg     = "A coma separated list of paths to the file containing the "
                                    "read density data" ;
     std::string opt_seq_msg      = "The path to the file containing the sequence data" ;
     std::string opt_iter_msg     = "The number of iterations." ;
     std::string opt_class_msg    = "The number of classes to find." ;
     std::string opt_shift_msg    = "Enables this number of column of shifting "
                                    "freedom. By default, shifting is "
                                    "disabled (equivalent to --shift 1)." ;
     std::string opt_flip_msg     = "Enables flipping.";
     std::string opt_seeding_msg  = "Specify which method should be used to initialise the "
                                    "cluster references." ;
     std::string opt_seed_msg     = "A value to seed the random number generator.";
 
     // option parser
     boost::program_options::variables_map vm ;
     boost::program_options::options_description desc(desc_msg) ;
 
     std::string seeding_tmp ;
 
     desc.add_options()
                 ("help,h",   opt_help_msg.c_str())
 
                 ("read",     po::value<std::string>(&(this->file_read)),     opt_read_msg.c_str())
                 ("seq",      po::value<std::string>(&(this->file_sequence)), opt_read_msg.c_str())
 
                 ("iter,i",   po::value<size_t>(&(this->n_iter)),             opt_iter_msg.c_str())
                 ("class,c",  po::value<size_t>(&(this->n_class)),            opt_class_msg.c_str())
                 ("shift,s",  po::value<size_t>(&(this->n_shift)),            opt_shift_msg.c_str())
                 ("flip",     opt_flip_msg.c_str())
 
                 ("seeding",  po::value<std::string>(&(seeding_tmp)),         opt_seeding_msg.c_str())
                 ("seed",     po::value<std::string>(&(this->seed)),          opt_seed_msg.c_str())
                 ("thread",   po::value<std::size_t>(&(this->n_threads)),     opt_thread_msg.c_str()) ;
 
     // parse
     try
     {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
         po::notify(vm) ;
     }
     catch(std::invalid_argument& e)
     {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
         throw std::invalid_argument(msg) ;
     }
     catch(...)
     {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
 
     bool help = vm.count("help") ;
 
     // checks unproper option settings
     if(this->file_read == "" and
        this->file_sequence == "" and
        (not help))
     {   std::string msg("Error! No data were given (--read and/or --seq)!") ;
         throw std::invalid_argument(msg) ;
     }
     else if((seeding_tmp != "random") and
             (seeding_tmp != "sampling") and
             (seeding_tmp != "toy") and
             (not help))
     {   std::string msg("Error! Unrecognized seeding method (--seeding)!") ;
         throw std::invalid_argument(msg) ;
     }
 
     // no class given -> 1 iter
     if(this->n_iter == 0)
     {   this->n_iter = 1 ; }
     // no shift class given -> 1 class
     if(this->n_class == 0)
     {   this->n_class = 1 ; }
     // no shift given, value of 1 -> no shift
     if(this->n_shift == 0)
     {   this->n_shift = 1 ; }
     // set seeding
     if(seeding_tmp == "random")
     {   this->seeding = EMEngine::seeding_codes::RANDOM ; }
     else if(seeding_tmp == "sampling")
     {   this->seeding = EMEngine::seeding_codes::SAMPLING ; }
     else if(seeding_tmp == "toy")
     {   this->seeding = EMEngine::seeding_codes::TOY ; }
     // set flip
     if(vm.count("flip"))
     {   this->flip  = true ; }
 
     // help invoked, run() cannot be invoked
     if(help)
     {   std::cout << desc << std::endl ;
         this->runnable = false ;
         return ;
     }
     // everything fine, run() can be called
     else
     {   this->runnable = true ;
         return ;
     }
 }
 
 
 int main(int argn, char** argv)
 {   ChIPPartitioningApplication app(argn, argv) ;
     return app.run() ;
 }
 
diff --git a/src/Applications/ChIPPartitioningApplication.hpp b/src/Applications/ChIPPartitioningApplication.hpp
index 912b9aa..2b281d2 100644
--- a/src/Applications/ChIPPartitioningApplication.hpp
+++ b/src/Applications/ChIPPartitioningApplication.hpp
@@ -1,106 +1,105 @@
 #ifndef CHIPPPARTITIONINGAPPLICATION_HPP
 #define CHIPPPARTITIONINGAPPLICATION_HPP
 
 #include <ApplicationInterface.hpp>
 #include <EMEngine.hpp>
-#include <matrices.hpp>
 
 #include <string>
 
 /*!
  * \brief The ChIPPartitioningApplication class is a wrapper around an EMEngine
  * instance creating an autonomous application to classify data by directly
  * passing all the options and parameters from the command line.
  */
 class ChIPPartitioningApplication: public ApplicationInterface
 {
     public:
         ChIPPartitioningApplication() = delete ;
         ChIPPartitioningApplication(const ChIPPartitioningApplication& app) = delete ;
         /*!
          * \brief Constructs an object from the command line
          * options.
          * \param argn the number of options passed to the
          * main() function.
          * \param argv the vector of options passed to the
          * main() function.
          */
         ChIPPartitioningApplication(int argn, char** argv) ;
 
         /*!
          * \brief Runs the application. The data are classified
          * using the given settings and the posterior probability
          * matrix is returned through the stdout.
          * The matrix is a 4D matrix with dimensions :
          * regions, class, shift flip.
          * \return an exit code  EXIT_SUCCESS or EXIT_FAILURE
          * to return to the OS.
          */
         virtual int run() override ;
 
     private:
         /*!
          * \brief Parses the program command line options and
          * sets the object field accordingly.
          * If the help option is detected, the "runnable"
          * field is set to false and subsequent calls to
          * run() will produce nothing.
          * \param argn the number of options passed to the
          * main() function.
          * \param argv the vector of options passed to the
          * main() function.
          * \throw std::invalid_argument if an error is found
          * in the program options.
          */
         void parseOptions(int argn, char** argv) ;
 
         /*!
          * \brief the paths to the files containing the read
          * density data.
          */
         std::string file_read ;
         /*!
          * \brief the path to the file containing the
          * sequence data.
          */
         std::string file_sequence ;
         /*!
          * \brief the number of classes to partition the data into.
          */
         size_t n_class ;
         /*!
          * \brief the number of iterations allowed.
          */
         size_t n_iter ;
         /*!
          * \brief the shifting freedom.
          */
         size_t n_shift ;
         /*!
          * \brief whether flipping freedom is allowed.
          */
         bool flip ;
 
         /*!
          * \brief the number of threads.
          */
         size_t n_threads ;
 
         /*!
          * \brief the seeding method to use.
          */
         EMEngine::seeding_codes seeding ;
         /*!
          * \brief a seed to initialise the random number generator.
          */
         std::string seed ;
 
         /*!
          * \brief a flag indicating whether the core of run() can be
          * run or not.
          */
         bool runnable ;
 } ;
 
 
 #endif // CHIPPPARTITIONINGAPPLICATION_HPP
diff --git a/src/Applications/CorrelationMatrixCreatorApplication.cpp b/src/Applications/CorrelationMatrixCreatorApplication.cpp
index 3bc1842..4fe08f4 100644
--- a/src/Applications/CorrelationMatrixCreatorApplication.cpp
+++ b/src/Applications/CorrelationMatrixCreatorApplication.cpp
@@ -1,190 +1,190 @@
 
 #include <CorrelationMatrixCreatorApplication.hpp>
 #include <CorrelationMatrixCreator.hpp>
 
 #include <boost/program_options.hpp>
 #include <iostream>
 #include <string>
 #include <stdexcept>                   // std::invalid_argument
 
 
 namespace po = boost::program_options ;
 
 // the valid values for --method option
 std::string method_read            = "read" ;
 std::string method_read_atac       = "read_atac" ;
 std::string method_fragment        = "fragment" ;
 std::string method_fragment_center = "fragment_center" ;
 
 
 CorrelationMatrixCreatorApplication::CorrelationMatrixCreatorApplication(int argn, char** argv)
-    : file_bed(""), file_bam(""), from(0), to(0), bin_size(0),
+    : file_bed(""), file_bam(""), file_bai(""), from(0), to(0), bin_size(0),
       method(CorrelationMatrixCreator::FRAGMENT), runnable(true)
 {
     // parse command line options and set the fields
     this->parseOptions(argn, argv) ;
 }
 
 int CorrelationMatrixCreatorApplication::run()
 {   if(this->runnable)
     {   CorrelationMatrixCreator mc(this->file_bed,
                                     this->file_bam,
                                     this->file_bai,
                                     this->from,
                                     this->to,
                                     this->bin_size,
                                     this->method) ;
 
         std::cout << mc.create_matrix() << std::endl ;
         return EXIT_SUCCESS ;
     }
     else
     {   return EXIT_FAILURE ; }
 }
 
 void CorrelationMatrixCreatorApplication::parseOptions(int argn, char** argv)
 {
     // no option to parse
     if(argv == nullptr)
     {   std::string message = "no options to parse!" ;
         throw std::invalid_argument(message) ;
     }
 
     // help messages
     std::string desc_msg =         "\n"
                                    "CorrelationMatrixCreator is an application that creates a\n"
                                    "count matrix from a BED file and a BAM file and returnes it\n"
                                    "through stdout.\n"
                                    "The matrix contains one row per region (reference region)\n"
                                    "present in the BED file. For each region, its center is\n"
                                    "computed and a set of equally sized, non-overlapping bins,\n"
                                    "centered on the region center and covering the interval [from,to]\n"
                                    "is build. Then, each bin is assigned the number of read/fragment\n"
                                    "positions (targets) present in the BAM file that are mapped at\n"
                                    "that position.\n"
                                    "The matrix is a 2D matrix which dimensions are :\n"
                                    "1) number of regions\n"
                                    "2) length of region (to - from + 1) / bin_size\n\n" ;
     std::string opt_help_msg     = "Produces this help message." ;
     std::string opt_bed_msg      = "The path to the BED file containing the references.";
     std::string opt_bam_msg      = "The path to the BAM file containing the targets.";
-    std::string opt_bai_msg      = "The path to the BAI file containing the index BAM file.";
-    std::string opt_from_msg     = "The upstream limit - in relative coordinate - of the region to build "
+    std::string opt_bai_msg      = "The path to the BAI file containing the BAM file index.";
+    std::string opt_from_msg     = "The upstream limit - in relative coordinate - of the region to build\n"
                                    "around each reference center." ;
-    std::string opt_to_msg       = "The downstream limit - in relative coordinate - of the region to build "
+    std::string opt_to_msg       = "The downstream limit - in relative coordinate - of the region to build\n"
                                    "around each reference center." ;
     std::string opt_binsize_msg  = "The size of the bins." ;
     char tmp[4096] ;
     sprintf(tmp,
-                                   "How the data in the BAM file should be handled when computing "
+                                   "How the data in the BAM file should be handled when computing\n"
                                    "the number of counts in each bin.\n"
                                    "\t\"%s\" uses each position within the reads (by default)\n"
                                    "\t\"%s\" uses only the insertion site for ATAC-seq data\n"
                                    "\t\"%s\" uses each position within the fragments\n"
                                    "\t\"%s\" uses only the fragment central positions\n",
             method_read.c_str(),
             method_read_atac.c_str(),
             method_fragment.c_str(),
             method_fragment_center.c_str()) ;
 
      std::string opt_method_msg = tmp ;
 
     // option parser
     boost::program_options::variables_map vm ;
     boost::program_options::options_description desc(desc_msg) ;
 
     std::string method(method_read) ;
 
     desc.add_options()
                 ("help,h",  opt_help_msg.c_str())
 
                 ("bed",     po::value<std::string>(&(this->file_bed)), opt_bed_msg.c_str())
                 ("bam",     po::value<std::string>(&(this->file_bam)), opt_bam_msg.c_str())
                 ("bai",     po::value<std::string>(&(this->file_bai)), opt_bai_msg.c_str())
 
                 ("from",    po::value<int>(&(this->from)),             opt_from_msg.c_str())
                 ("to",      po::value<int>(&(this->to)),               opt_to_msg.c_str())
                 ("binSize", po::value<int>(&(this->bin_size)),         opt_binsize_msg.c_str())
                 ("method",  po::value<std::string>(&(method)),         opt_method_msg.c_str()) ;
 
     // parse
     try
     {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
         po::notify(vm) ;
     }
     catch(std::invalid_argument& e)
     {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
         throw std::invalid_argument(msg) ;
     }
     catch(...)
     {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
 
     bool help = vm.count("help") ;
 
     // checks unproper option settings
     if(this->file_bed == "" and (not help))
     {   std::string msg("Error! No BED file was given (--bed)!") ;
         throw std::invalid_argument(msg) ;
     }
     else if(this->file_bam == "" and (not help))
     {   std::string msg("Error! No BAM file was given (--bam)!") ;
         throw std::invalid_argument(msg) ;
     }
     else if(this->file_bai == "" and (not help))
     {   std::string msg("Error! No BAM index file was given (--bai)!") ;
         throw std::invalid_argument(msg) ;
     }
     else if(this->from == 0 and this->to == 0 and (not help))
     {   std::string msg("Error! No range given (--from and --to)!") ;
         throw std::invalid_argument(msg) ;
     }
     else if(this->from >= this->to and (not help))
     {   std::string msg("Error! from shoud be smaller than to (--from and --to)!") ;
         throw std::invalid_argument(msg) ;
     }
     else if(this->bin_size <= 0 and (not help))
     {   std::string msg("Error! bin size should be bigger than 0 (--binSize)!") ;
         throw std::invalid_argument(msg) ;
     }
     else if(method != method_read and
             method != method_read_atac and
             method != method_fragment and
             method != method_fragment_center)
     {   char msg[4096] ;
         sprintf(msg, "Error! method should be %s, %s, %s or %s (--method)",
                 method_read.c_str(),
                 method_read_atac.c_str(),
                 method_fragment.c_str(),
                 method_fragment_center.c_str()) ;
         throw std::invalid_argument(msg) ;
     }
 
     // set method
     if(method == method_read)
     {   this->method = CorrelationMatrixCreator::READ ; }
     else if(method == method_read_atac)
     {   this->method = CorrelationMatrixCreator::READ_ATAC ; }
     else if(method == method_fragment)
     {   this->method = CorrelationMatrixCreator::FRAGMENT ; }
     else if(method == method_fragment_center)
     {   this->method = CorrelationMatrixCreator::FRAGMENT_CENTER ; }
 
     // help invoked, run() cannot be invoked
     if(help)
     {   std::cout << desc << std::endl ;
         this->runnable = false ;
         return ;
     }
     // everything fine, run() can be called
     else
     {   this->runnable = true ;
         return ;
     }
 }
 
 
 int main(int argn, char** argv)
 {   CorrelationMatrixCreatorApplication app(argn, argv) ;
     return app.run() ;
 }
 
diff --git a/src/Applications/ChIPPartitioningApplication.cpp b/src/Applications/EMJointApplication.cpp
similarity index 59%
copy from src/Applications/ChIPPartitioningApplication.cpp
copy to src/Applications/EMJointApplication.cpp
index 943afc7..b188fec 100644
--- a/src/Applications/ChIPPartitioningApplication.cpp
+++ b/src/Applications/EMJointApplication.cpp
@@ -1,174 +1,174 @@
 
-#include <ChIPPartitioningApplication.hpp>
-#include <EMEngine.hpp>
+#include <EMJointApplication.hpp>
+#include <EMJoint.hpp>
 
 #include <iostream>
 #include <string>
 #include <stdexcept>                   // std::invalid_argument
 #include <boost/program_options.hpp>
-#include <boost/algorithm/string.hpp>  //
+#include <boost/algorithm/string.hpp>  // boost::split()
+
+#include <Matrix2D.hpp>
 
 namespace po = boost::program_options ;
 
 
-ChIPPartitioningApplication::ChIPPartitioningApplication(int argn, char** argv)
-    : file_read(""), file_sequence(""), n_class(0), n_iter(0), n_shift(0), flip(false),
-      n_threads(0), seeding(EMEngine::seeding_codes::RANDOM),
-      seed(""), runnable(true)
+EMJointApplication::EMJointApplication(int argn, char** argv)
+    : files_read(""), file_sequence(""), n_class(0), n_iter(0), n_shift(0), flip(false),
+      n_threads(0), seed(""), runnable(true)
 {
     // parse command line options and set the fields
     this->parseOptions(argn, argv) ;
 }
 
-int ChIPPartitioningApplication::run()
+int EMJointApplication::run()
 {   if(this->runnable)
     {   // read data
         std::vector<std::string> read_paths ;
-        boost::split(read_paths, this->file_read, [](char c){return c == ',';});
-        std::vector<matrix2d_i> data_read ;
+        boost::split(read_paths, this->files_read, [](char c){return c == ',';}) ;
+        std::vector<Matrix2D<int>> data_read ;
         for(const auto& path : read_paths)
         {   if(path == "")
             {   continue ; }
-            data_read.push_back(read_matrix2d_i(path)) ;
+            data_read.push_back(Matrix2D<int>(path)) ;
         }
         // sequence data
-        std::vector<matrix2d_i> data_seq ;
-        if(this->file_sequence != "")
-        {   data_seq.push_back(read_matrix2d_i(this->file_sequence)) ; }
-
-        EMEngine em(data_read,
-                    data_seq,
-                    this->n_class,
-                    this->n_iter,
-                    this->n_shift,
-                    this->flip,
-                    this->seeding,
-                    this->seed,
-                    this->n_threads) ;
-        em.classify() ;
-        std::cout << em.get_post_prob() << std::endl ;
+        EMJoint* em = nullptr ;
+        if(this->file_sequence == "")
+        {   em = new EMJoint(data_read,
+                             this->n_class,
+                             this->n_iter,
+                             this->n_shift,
+                             this->flip,
+                             this->seed,
+                             this->n_threads) ;
+        }
+        else
+        {   Matrix2D<int> data_seq(this->file_sequence) ;
+            em = new EMJoint(data_read,
+                             data_seq,
+                             this->n_class,
+                             this->n_iter,
+                             this->n_shift,
+                             this->flip,
+                             this->seed,
+                             this->n_threads) ;
+        }
+        em->classify() ;
+        std::cout << em->get_post_prob() << std::endl ;
+        delete em ;
+        em = nullptr ;
         return EXIT_SUCCESS ;
     }
     else
     {   return EXIT_FAILURE ; }
 }
 
-void ChIPPartitioningApplication::parseOptions(int argn, char** argv)
+void EMJointApplication::parseOptions(int argn, char** argv)
 {
     // no option to parse
     if(argv == nullptr)
     {   std::string message = "no options to parse!" ;
         throw std::invalid_argument(message) ;
     }
 
     // help messages
     std::string desc_msg =         "\n"
-                                   "ChIPPartitioning is a probabilistic partitioning algorithm that \n"
-                                   "sofetly assigns genomic regions to classes given their shape \n"
-                                   "of the signal over the region. The assignment probabilities \n"
-                                   "are returned through stdout.\n\n" ;
+                                   "EMJoint is a probabilistic partitioning algorithm that \n"
+                                   "sofetly assigns genomic regions to classes given 1) the shapes \n"
+                                   "of the read densities over the regions and 2) the region sequence \n"
+                                   "motif contents. \n "
+                                   "The assignment probabilitiesare returned through stdout.\n\n" ;
     std::string opt_help_msg     = "Produces this help message." ;
-    std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations,\n "
+    std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations, \n"
                                    "by default 0 (no parallelization)." ;
-    std::string opt_read_msg     = "A coma separated list of paths to the file containing the "
-                                   "read density data" ;
-    std::string opt_seq_msg      = "The path to the file containing the sequence data" ;
+    std::string opt_read_msg     = "A coma separated list of paths to the file containing the \n"
+                                   "read density data. At least one path is needed." ;
+    std::string opt_seq_msg      = "The path to the file containing the sequence data. If no path is \n"
+                                   "given, the classification is only cares about the read density \n"
+                                   "shapes." ;
     std::string opt_iter_msg     = "The number of iterations." ;
     std::string opt_class_msg    = "The number of classes to find." ;
     std::string opt_shift_msg    = "Enables this number of column of shifting "
                                    "freedom. By default, shifting is "
                                    "disabled (equivalent to --shift 1)." ;
     std::string opt_flip_msg     = "Enables flipping.";
-    std::string opt_seeding_msg  = "Specify which method should be used to initialise the "
-                                   "cluster references." ;
     std::string opt_seed_msg     = "A value to seed the random number generator.";
 
     // option parser
     boost::program_options::variables_map vm ;
     boost::program_options::options_description desc(desc_msg) ;
 
-    std::string seeding_tmp ;
-
     desc.add_options()
                 ("help,h",   opt_help_msg.c_str())
 
-                ("read",     po::value<std::string>(&(this->file_read)),     opt_read_msg.c_str())
+                ("read",     po::value<std::string>(&(this->files_read)),     opt_read_msg.c_str())
                 ("seq",      po::value<std::string>(&(this->file_sequence)), opt_read_msg.c_str())
 
                 ("iter,i",   po::value<size_t>(&(this->n_iter)),             opt_iter_msg.c_str())
                 ("class,c",  po::value<size_t>(&(this->n_class)),            opt_class_msg.c_str())
                 ("shift,s",  po::value<size_t>(&(this->n_shift)),            opt_shift_msg.c_str())
                 ("flip",     opt_flip_msg.c_str())
 
-                ("seeding",  po::value<std::string>(&(seeding_tmp)),         opt_seeding_msg.c_str())
                 ("seed",     po::value<std::string>(&(this->seed)),          opt_seed_msg.c_str())
                 ("thread",   po::value<std::size_t>(&(this->n_threads)),     opt_thread_msg.c_str()) ;
 
     // parse
     try
     {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
         po::notify(vm) ;
     }
     catch(std::invalid_argument& e)
     {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
         throw std::invalid_argument(msg) ;
     }
     catch(...)
     {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
 
     bool help = vm.count("help") ;
 
     // checks unproper option settings
-    if(this->file_read == "" and
+    if(this->files_read == "" and
        this->file_sequence == "" and
        (not help))
-    {   std::string msg("Error! No data were given (--read and/or --seq)!") ;
+    {   std::string msg("Error! No data were given (--read and --seq)!") ;
         throw std::invalid_argument(msg) ;
     }
-    else if((seeding_tmp != "random") and
-            (seeding_tmp != "sampling") and
-            (seeding_tmp != "toy") and
-            (not help))
-    {   std::string msg("Error! Unrecognized seeding method (--seeding)!") ;
+    if(this->files_read == "" and
+       (not help))
+    {   std::string msg("Error! No read density data were given (--read)!") ;
         throw std::invalid_argument(msg) ;
     }
 
-    // no class given -> 1 iter
+    // no iter given -> 1 iter
     if(this->n_iter == 0)
     {   this->n_iter = 1 ; }
     // no shift class given -> 1 class
     if(this->n_class == 0)
     {   this->n_class = 1 ; }
     // no shift given, value of 1 -> no shift
     if(this->n_shift == 0)
     {   this->n_shift = 1 ; }
-    // set seeding
-    if(seeding_tmp == "random")
-    {   this->seeding = EMEngine::seeding_codes::RANDOM ; }
-    else if(seeding_tmp == "sampling")
-    {   this->seeding = EMEngine::seeding_codes::SAMPLING ; }
-    else if(seeding_tmp == "toy")
-    {   this->seeding = EMEngine::seeding_codes::TOY ; }
     // set flip
     if(vm.count("flip"))
     {   this->flip  = true ; }
 
     // help invoked, run() cannot be invoked
     if(help)
     {   std::cout << desc << std::endl ;
         this->runnable = false ;
         return ;
     }
     // everything fine, run() can be called
     else
     {   this->runnable = true ;
         return ;
     }
 }
 
 
 int main(int argn, char** argv)
-{   ChIPPartitioningApplication app(argn, argv) ;
+{   EMJointApplication app(argn, argv) ;
     return app.run() ;
 }
 
diff --git a/src/Applications/ChIPPartitioningApplication.hpp b/src/Applications/EMJointApplication.hpp
similarity index 76%
copy from src/Applications/ChIPPartitioningApplication.hpp
copy to src/Applications/EMJointApplication.hpp
index 912b9aa..4fa806c 100644
--- a/src/Applications/ChIPPartitioningApplication.hpp
+++ b/src/Applications/EMJointApplication.hpp
@@ -1,106 +1,101 @@
-#ifndef CHIPPPARTITIONINGAPPLICATION_HPP
-#define CHIPPPARTITIONINGAPPLICATION_HPP
+#ifndef EMJOINTAPPLICATION_HPP
+#define EMJOINTAPPLICATION_HPP
 
 #include <ApplicationInterface.hpp>
-#include <EMEngine.hpp>
-#include <matrices.hpp>
+#include <EMJoint.hpp>
 
 #include <string>
 
 /*!
- * \brief The ChIPPartitioningApplication class is a wrapper around an EMEngine
+ * \brief The EMJointApplication class is a wrapper around an EMJoint
  * instance creating an autonomous application to classify data by directly
  * passing all the options and parameters from the command line.
  */
-class ChIPPartitioningApplication: public ApplicationInterface
+class EMJointApplication: public ApplicationInterface
 {
     public:
-        ChIPPartitioningApplication() = delete ;
-        ChIPPartitioningApplication(const ChIPPartitioningApplication& app) = delete ;
+        EMJointApplication() = delete ;
+        EMJointApplication(const EMJointApplication& app) = delete ;
         /*!
          * \brief Constructs an object from the command line
          * options.
          * \param argn the number of options passed to the
          * main() function.
          * \param argv the vector of options passed to the
          * main() function.
          */
-        ChIPPartitioningApplication(int argn, char** argv) ;
+        EMJointApplication(int argn, char** argv) ;
 
         /*!
          * \brief Runs the application. The data are classified
          * using the given settings and the posterior probability
          * matrix is returned through the stdout.
          * The matrix is a 4D matrix with dimensions :
          * regions, class, shift flip.
          * \return an exit code  EXIT_SUCCESS or EXIT_FAILURE
          * to return to the OS.
          */
         virtual int run() override ;
 
     private:
         /*!
          * \brief Parses the program command line options and
          * sets the object field accordingly.
          * If the help option is detected, the "runnable"
          * field is set to false and subsequent calls to
          * run() will produce nothing.
          * \param argn the number of options passed to the
          * main() function.
          * \param argv the vector of options passed to the
          * main() function.
          * \throw std::invalid_argument if an error is found
          * in the program options.
          */
         void parseOptions(int argn, char** argv) ;
 
         /*!
-         * \brief the paths to the files containing the read
-         * density data.
+         * \brief a coma separated list of paths to the files
+         * containing the read density data
          */
-        std::string file_read ;
+        std::string files_read ;
         /*!
          * \brief the path to the file containing the
          * sequence data.
          */
         std::string file_sequence ;
         /*!
          * \brief the number of classes to partition the data into.
          */
         size_t n_class ;
         /*!
          * \brief the number of iterations allowed.
          */
         size_t n_iter ;
         /*!
          * \brief the shifting freedom.
          */
         size_t n_shift ;
         /*!
          * \brief whether flipping freedom is allowed.
          */
         bool flip ;
 
         /*!
          * \brief the number of threads.
          */
         size_t n_threads ;
 
-        /*!
-         * \brief the seeding method to use.
-         */
-        EMEngine::seeding_codes seeding ;
         /*!
          * \brief a seed to initialise the random number generator.
          */
         std::string seed ;
 
         /*!
          * \brief a flag indicating whether the core of run() can be
          * run or not.
          */
         bool runnable ;
 } ;
 
 
-#endif // CHIPPPARTITIONINGAPPLICATION_HPP
+#endif // EMJOINTAPPLICATION_HPP
diff --git a/src/Applications/ChIPPartitioningApplication.cpp b/src/Applications/EMReadApplication.cpp
similarity index 52%
copy from src/Applications/ChIPPartitioningApplication.cpp
copy to src/Applications/EMReadApplication.cpp
index 943afc7..5b8e842 100644
--- a/src/Applications/ChIPPartitioningApplication.cpp
+++ b/src/Applications/EMReadApplication.cpp
@@ -1,174 +1,136 @@
 
-#include <ChIPPartitioningApplication.hpp>
-#include <EMEngine.hpp>
+#include <EMReadApplication.hpp>
+#include <EMRead.hpp>
 
 #include <iostream>
 #include <string>
 #include <stdexcept>                   // std::invalid_argument
 #include <boost/program_options.hpp>
-#include <boost/algorithm/string.hpp>  //
+
+#include <Matrix2D.hpp>
 
 namespace po = boost::program_options ;
 
 
-ChIPPartitioningApplication::ChIPPartitioningApplication(int argn, char** argv)
-    : file_read(""), file_sequence(""), n_class(0), n_iter(0), n_shift(0), flip(false),
-      n_threads(0), seeding(EMEngine::seeding_codes::RANDOM),
-      seed(""), runnable(true)
+EMReadApplication::EMReadApplication(int argn, char** argv)
+    : file_read(""), n_class(0), n_iter(0), n_shift(0), flip(false),
+      n_threads(0), seed(""), runnable(true)
 {
     // parse command line options and set the fields
     this->parseOptions(argn, argv) ;
 }
 
-int ChIPPartitioningApplication::run()
+int EMReadApplication::run()
 {   if(this->runnable)
-    {   // read data
-        std::vector<std::string> read_paths ;
-        boost::split(read_paths, this->file_read, [](char c){return c == ',';});
-        std::vector<matrix2d_i> data_read ;
-        for(const auto& path : read_paths)
-        {   if(path == "")
-            {   continue ; }
-            data_read.push_back(read_matrix2d_i(path)) ;
-        }
-        // sequence data
-        std::vector<matrix2d_i> data_seq ;
-        if(this->file_sequence != "")
-        {   data_seq.push_back(read_matrix2d_i(this->file_sequence)) ; }
-
-        EMEngine em(data_read,
-                    data_seq,
-                    this->n_class,
-                    this->n_iter,
-                    this->n_shift,
-                    this->flip,
-                    this->seeding,
-                    this->seed,
-                    this->n_threads) ;
+    {   EMRead em(Matrix2D<int>(this->file_read),
+                  this->n_class,
+                  this->n_iter,
+                  this->n_shift,
+                  this->flip,
+                  this->seed,
+                  this->n_threads) ;
         em.classify() ;
         std::cout << em.get_post_prob() << std::endl ;
         return EXIT_SUCCESS ;
     }
     else
     {   return EXIT_FAILURE ; }
 }
 
-void ChIPPartitioningApplication::parseOptions(int argn, char** argv)
+void EMReadApplication::parseOptions(int argn, char** argv)
 {
     // no option to parse
     if(argv == nullptr)
     {   std::string message = "no options to parse!" ;
         throw std::invalid_argument(message) ;
     }
 
     // help messages
     std::string desc_msg =         "\n"
-                                   "ChIPPartitioning is a probabilistic partitioning algorithm that \n"
-                                   "sofetly assigns genomic regions to classes given their shape \n"
-                                   "of the signal over the region. The assignment probabilities \n"
-                                   "are returned through stdout.\n\n" ;
+                                   "EMRead is a probabilistic partitioning algorithm that \n"
+                                   "sofetly assigns genomic regions to classes given the shape \n"
+                                   "of the read density over the region. The assignment \n"
+                                   "probabilities are returned through stdout.\n\n" ;
     std::string opt_help_msg     = "Produces this help message." ;
     std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations,\n "
                                    "by default 0 (no parallelization)." ;
-    std::string opt_read_msg     = "A coma separated list of paths to the file containing the "
-                                   "read density data" ;
-    std::string opt_seq_msg      = "The path to the file containing the sequence data" ;
+    std::string opt_read_msg     = "The path to the file containing the read density data" ;
     std::string opt_iter_msg     = "The number of iterations." ;
     std::string opt_class_msg    = "The number of classes to find." ;
     std::string opt_shift_msg    = "Enables this number of column of shifting "
-                                   "freedom. By default, shifting is "
+                                   "freedom to realign the data. By default, shifting is "
                                    "disabled (equivalent to --shift 1)." ;
-    std::string opt_flip_msg     = "Enables flipping.";
-    std::string opt_seeding_msg  = "Specify which method should be used to initialise the "
-                                   "cluster references." ;
+    std::string opt_flip_msg     = "Enables flipping to realign the data.";
     std::string opt_seed_msg     = "A value to seed the random number generator.";
 
     // option parser
     boost::program_options::variables_map vm ;
     boost::program_options::options_description desc(desc_msg) ;
 
     std::string seeding_tmp ;
 
     desc.add_options()
                 ("help,h",   opt_help_msg.c_str())
 
                 ("read",     po::value<std::string>(&(this->file_read)),     opt_read_msg.c_str())
-                ("seq",      po::value<std::string>(&(this->file_sequence)), opt_read_msg.c_str())
 
                 ("iter,i",   po::value<size_t>(&(this->n_iter)),             opt_iter_msg.c_str())
                 ("class,c",  po::value<size_t>(&(this->n_class)),            opt_class_msg.c_str())
                 ("shift,s",  po::value<size_t>(&(this->n_shift)),            opt_shift_msg.c_str())
                 ("flip",     opt_flip_msg.c_str())
 
-                ("seeding",  po::value<std::string>(&(seeding_tmp)),         opt_seeding_msg.c_str())
                 ("seed",     po::value<std::string>(&(this->seed)),          opt_seed_msg.c_str())
                 ("thread",   po::value<std::size_t>(&(this->n_threads)),     opt_thread_msg.c_str()) ;
 
     // parse
     try
     {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
         po::notify(vm) ;
     }
     catch(std::invalid_argument& e)
     {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
         throw std::invalid_argument(msg) ;
     }
     catch(...)
     {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
 
     bool help = vm.count("help") ;
 
     // checks unproper option settings
     if(this->file_read == "" and
-       this->file_sequence == "" and
        (not help))
-    {   std::string msg("Error! No data were given (--read and/or --seq)!") ;
-        throw std::invalid_argument(msg) ;
-    }
-    else if((seeding_tmp != "random") and
-            (seeding_tmp != "sampling") and
-            (seeding_tmp != "toy") and
-            (not help))
-    {   std::string msg("Error! Unrecognized seeding method (--seeding)!") ;
+    {   std::string msg("Error! No data were given (--read)!") ;
         throw std::invalid_argument(msg) ;
     }
 
-    // no class given -> 1 iter
+    // no iter given -> 1 iter
     if(this->n_iter == 0)
     {   this->n_iter = 1 ; }
     // no shift class given -> 1 class
     if(this->n_class == 0)
     {   this->n_class = 1 ; }
     // no shift given, value of 1 -> no shift
     if(this->n_shift == 0)
     {   this->n_shift = 1 ; }
-    // set seeding
-    if(seeding_tmp == "random")
-    {   this->seeding = EMEngine::seeding_codes::RANDOM ; }
-    else if(seeding_tmp == "sampling")
-    {   this->seeding = EMEngine::seeding_codes::SAMPLING ; }
-    else if(seeding_tmp == "toy")
-    {   this->seeding = EMEngine::seeding_codes::TOY ; }
     // set flip
     if(vm.count("flip"))
     {   this->flip  = true ; }
 
     // help invoked, run() cannot be invoked
     if(help)
     {   std::cout << desc << std::endl ;
         this->runnable = false ;
         return ;
     }
     // everything fine, run() can be called
     else
     {   this->runnable = true ;
         return ;
     }
 }
 
-
 int main(int argn, char** argv)
-{   ChIPPartitioningApplication app(argn, argv) ;
+{   EMReadApplication app(argn, argv) ;
     return app.run() ;
 }
 
diff --git a/src/Applications/ChIPPartitioningApplication.hpp b/src/Applications/EMReadApplication.hpp
similarity index 74%
copy from src/Applications/ChIPPartitioningApplication.hpp
copy to src/Applications/EMReadApplication.hpp
index 912b9aa..66cb1be 100644
--- a/src/Applications/ChIPPartitioningApplication.hpp
+++ b/src/Applications/EMReadApplication.hpp
@@ -1,106 +1,91 @@
-#ifndef CHIPPPARTITIONINGAPPLICATION_HPP
-#define CHIPPPARTITIONINGAPPLICATION_HPP
+#ifndef EMREADAPPLICATION_HPP
+#define EMREADAPPLICATION_HPP
 
 #include <ApplicationInterface.hpp>
-#include <EMEngine.hpp>
-#include <matrices.hpp>
 
 #include <string>
 
 /*!
- * \brief The ChIPPartitioningApplication class is a wrapper around an EMEngine
+ * \brief The EMReadApplication class is a wrapper around an EMRead
  * instance creating an autonomous application to classify data by directly
  * passing all the options and parameters from the command line.
  */
-class ChIPPartitioningApplication: public ApplicationInterface
+class EMReadApplication: public ApplicationInterface
 {
     public:
-        ChIPPartitioningApplication() = delete ;
-        ChIPPartitioningApplication(const ChIPPartitioningApplication& app) = delete ;
+        EMReadApplication() = delete ;
+        EMReadApplication(const EMReadApplication& app) = delete ;
         /*!
          * \brief Constructs an object from the command line
          * options.
          * \param argn the number of options passed to the
          * main() function.
          * \param argv the vector of options passed to the
          * main() function.
          */
-        ChIPPartitioningApplication(int argn, char** argv) ;
+        EMReadApplication(int argn, char** argv) ;
 
         /*!
          * \brief Runs the application. The data are classified
          * using the given settings and the posterior probability
          * matrix is returned through the stdout.
          * The matrix is a 4D matrix with dimensions :
          * regions, class, shift flip.
          * \return an exit code  EXIT_SUCCESS or EXIT_FAILURE
          * to return to the OS.
          */
         virtual int run() override ;
 
     private:
         /*!
          * \brief Parses the program command line options and
          * sets the object field accordingly.
          * If the help option is detected, the "runnable"
          * field is set to false and subsequent calls to
          * run() will produce nothing.
          * \param argn the number of options passed to the
          * main() function.
          * \param argv the vector of options passed to the
          * main() function.
          * \throw std::invalid_argument if an error is found
          * in the program options.
          */
         void parseOptions(int argn, char** argv) ;
 
         /*!
-         * \brief the paths to the files containing the read
+         * \brief the paths to the file containing the read
          * density data.
          */
         std::string file_read ;
-        /*!
-         * \brief the path to the file containing the
-         * sequence data.
-         */
-        std::string file_sequence ;
         /*!
          * \brief the number of classes to partition the data into.
          */
         size_t n_class ;
         /*!
          * \brief the number of iterations allowed.
          */
         size_t n_iter ;
         /*!
          * \brief the shifting freedom.
          */
         size_t n_shift ;
         /*!
          * \brief whether flipping freedom is allowed.
          */
         bool flip ;
-
         /*!
          * \brief the number of threads.
          */
         size_t n_threads ;
-
-        /*!
-         * \brief the seeding method to use.
-         */
-        EMEngine::seeding_codes seeding ;
         /*!
          * \brief a seed to initialise the random number generator.
          */
         std::string seed ;
-
         /*!
          * \brief a flag indicating whether the core of run() can be
          * run or not.
          */
         bool runnable ;
 } ;
 
-
-#endif // CHIPPPARTITIONINGAPPLICATION_HPP
+#endif // EMREADAPPLICATION_HPP
diff --git a/src/Applications/EMSequenceApplication.cpp b/src/Applications/EMSequenceApplication.cpp
new file mode 100644
index 0000000..1d10824
--- /dev/null
+++ b/src/Applications/EMSequenceApplication.cpp
@@ -0,0 +1,278 @@
+
+#include <EMSequenceApplication.hpp>
+#include <EMSequence.hpp>
+
+#include <iostream>
+#include <string>
+#include <stdexcept>                   // std::invalid_argument
+#include <boost/program_options.hpp>
+#include <boost/algorithm/string.hpp>  // boost::split()
+
+#include <Matrix2D.hpp>
+#include <dna_utility.hpp>
+
+namespace po = boost::program_options ;
+
+template<class T>
+std::ostream& operator << (std::ostream& stream,
+                           const std::vector<T>& v)
+{   for(const auto& x : v)
+    {   stream << x << " " ; }
+    return stream ;
+}
+
+EMSequenceApplication::EMSequenceApplication(int argn, char** argv)
+    : file_seq(""), files_motif(""),
+      n_class(0), n_iter(0), n_shift(0), flip(false), bckg_class(false),
+      n_threads(0), seed(""), runnable(true)
+{
+    // parse command line options and set the fields
+    this->parseOptions(argn, argv) ;
+}
+
+int EMSequenceApplication::run()
+{   if(this->runnable)
+    {   EMSequence* em(nullptr) ;
+
+        // data
+        Matrix2D<int> data(this->file_seq) ;
+
+        // seeds motifs randomly
+        if(this->files_motif == "")
+        {   em = new EMSequence(data,
+                                this->n_class,
+                                this->n_iter,
+                                this->n_shift,
+                                this->flip,
+                                this->bckg_class,
+                                this->seed,
+                                this->n_threads) ;
+        }
+        // seeds motifs with the given matrices
+        else
+        {   // model
+            std::vector<std::string> motif_paths ;
+            boost::split(motif_paths, this->files_motif, [](char c){return c == ',';}) ;
+            // this->n_class = motif_paths.size() + this->bckg_class ;
+            size_t model_ncol = data.get_ncol() - this->n_shift + 1 ;
+
+            // add the given motif, random motifs (if needed) and
+            // background class (if needed)
+            Matrix3D<double> model = this->init_model(model_ncol,
+                                                      data,
+                                                      motif_paths) ;
+
+            em = new EMSequence(data,
+                                model,
+                                this->n_iter,
+                                this->flip,
+                                this->n_threads) ;
+        }
+
+        // classify
+        em->classify() ;
+        std::cout << em->get_post_prob() << std::endl ;
+
+        // clean
+        delete em ;
+        em = nullptr ;
+
+        return EXIT_SUCCESS ;
+    }
+    else
+    {   return EXIT_FAILURE ; }
+}
+
+void EMSequenceApplication::parseOptions(int argn, char** argv)
+{
+    // no option to parse
+    if(argv == nullptr)
+    {   std::string message = "no options to parse!" ;
+        throw std::invalid_argument(message) ;
+    }
+
+    // help messages
+    std::string desc_msg =         "\n"
+                                   "EMSequence is a probabilistic partitioning algorithm that \n"
+                                   "sofetly assigns sequences to classes given their motif content \n"
+                                   "The assignment probabilities are returned through stdout.\n\n" ;
+    std::string opt_help_msg     = "Produces this help message." ;
+    std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations,\n "
+                                   "by default 0 (no parallelization)." ;
+    std::string opt_seq_msg      = "The path to the file containing the sequences" ;
+    std::string opt_motifs_msg   = "A coma separated list of path to files containing the initial motifs\n"
+                                   "values. The motifs should be probability matrices in horizontal format.\n"
+                                   "If the motifs are too short after accounting for shifting, extra\n"
+                                   "columns with uniform probabilities will be added on each side. The\n"
+                                   "given number of classes (--class) should at least be the number of\n"
+                                   "initial motifs. If the number of classes is bigger than the number of"
+                                   "given motifs, the remaining classes are initialised randomly\n." ;
+    std::string opt_iter_msg     = "The number of iterations." ;
+    std::string opt_class_msg    = "The number of classes to find." ;
+    std::string opt_shift_msg    = "Enables this number of column of shifting freedom to realign\n"
+                                   "the data. By default, shifting is disabled (equivalent to\n"
+                                   "--shift 1)." ;
+    std::string opt_flip_msg     = "Enables flipping to realign the data.";
+    std::string opt_bckg_msg     = "Adds a class to model the sequence background. This class\n"
+                                   "contains the sequence background probabilities at each position\n"
+                                   "and is never updated." ;
+    std::string opt_seed_msg     = "A value to seed the random number generator.";
+
+    // option parser
+    boost::program_options::variables_map vm ;
+    boost::program_options::options_description desc(desc_msg) ;
+
+    std::string seeding_tmp ;
+
+    desc.add_options()
+                ("help,h",  opt_help_msg.c_str())
+
+                ("seq",     po::value<std::string>(&(this->file_seq)),      opt_seq_msg.c_str())
+
+                ("motifs",  po::value<std::string>(&(this->files_motif)),   opt_motifs_msg.c_str())
+
+                ("iter,i",  po::value<size_t>(&(this->n_iter)),             opt_iter_msg.c_str())
+                ("class,c", po::value<size_t>(&(this->n_class)),            opt_class_msg.c_str())
+                ("shift,s", po::value<size_t>(&(this->n_shift)),            opt_shift_msg.c_str())
+                ("flip",    opt_flip_msg.c_str())
+                ("bgclass", opt_bckg_msg.c_str())
+
+                ("seed",    po::value<std::string>(&(this->seed)),          opt_seed_msg.c_str())
+                ("thread",  po::value<std::size_t>(&(this->n_threads)),     opt_thread_msg.c_str()) ;
+
+    // parse
+    try
+    {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
+        po::notify(vm) ;
+    }
+    catch(std::invalid_argument& e)
+    {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
+        throw std::invalid_argument(msg) ;
+    }
+    catch(...)
+    {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
+
+    bool help = vm.count("help") ;
+
+    // checks unproper option settings
+    if(this->file_seq == "" and
+       (not help))
+    {   std::string msg("Error! No data were given (--seq)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+
+    // no iter given -> 1 iter
+    if(this->n_iter == 0)
+    {   this->n_iter = 1 ; }
+    // no shift class given -> 1 class
+    if(this->n_class == 0)
+    {   this->n_class = 1 ; }
+    // no shift given, value of 1 -> no shift
+    if(this->n_shift == 0)
+    {   this->n_shift = 1 ; }
+    // set flip
+    if(vm.count("flip"))
+    {   this->flip  = true ; }
+    // set background class
+    if(vm.count("bgclass"))
+    {   this->bckg_class  = true ; }
+    // help invoked, run() cannot be invoked
+    if(help)
+    {   std::cout << desc << std::endl ;
+        this->runnable = false ;
+        return ;
+    }
+    // everything fine, run() can be called
+    else
+    {   this->runnable = true ;
+        return ;
+    }
+}
+
+Matrix3D<double> EMSequenceApplication::init_model(size_t model_len,
+                                                   const Matrix2D<int>& data,
+                                                   const std::vector<std::string>& motif_paths) const
+{
+    int n_class_given = motif_paths.size() ;
+    int n_class_bckg  = this->bckg_class ;
+    int n_class_rand  = this->n_class - n_class_given - n_class_bckg ;
+
+    // number of classes should at least be number of motifs
+    if(n_class_given > (int)this->n_class)
+    {   char msg[4096] ;
+        sprintf(msg, "Error! number of class given (--class %zu) should at "
+                     "least be equal to number of motifs (--motifs %d)",
+                this->n_class, n_class_given) ;
+        throw std::invalid_argument(msg) ;
+    }
+    // check if there is room for a background class
+    if((int)this->n_class < n_class_given+this->bckg_class)
+    {   char msg[4096] ;
+        sprintf(msg, "Error! no class left to add a background "
+                     "class (--bgclass) with the given motifs (--motifs) (--class %zu)",
+                this->n_class) ;
+        throw std::invalid_argument(msg) ;
+    }
+
+    // init empty model
+    Matrix3D<double> model(this->n_class,
+                           model_len,
+                           4,
+                           0.25) ;
+    // add given motifs
+    for(size_t i=0; i<motif_paths.size(); i++)
+    {   Matrix2D<double> matrix(motif_paths[i]) ;
+        // motif is too big for this shift
+        if(matrix.get_ncol() > model_len)
+        {   char msg[4096] ;
+            sprintf(msg,
+                    "Error! In %s, motif column number is bigger "
+                    "than data column number - shift + 1 "
+                    "(%zu > %zu - %zu + 1)",
+                    motif_paths[i].c_str(),
+                    matrix.get_ncol(),
+                    data.get_ncol(),
+                    this->n_shift) ;
+            throw std::invalid_argument(msg) ;
+        }
+        // insert motif in middle of matrix
+        else
+        {   // size_t j_model = this->n_shift / 2 ;
+            size_t j_model = (model_len - matrix.get_ncol()) / 2 ;
+            for(size_t j_mat=0, j_mod=j_model; j_mat<matrix.get_ncol(); j_mat++, j_mod++)
+            {   for(size_t k=0; k<4; k++)
+                {   model(i,j_mod,k) = matrix(k,j_mat) ; }
+            }
+        }
+    }
+
+    // add random motifs and background class
+    // delegate this to EMSequence constructor
+    // (ensure that it is done properly)
+    if(n_class_rand > 0)
+    {   // initialise randomly
+        EMSequence em(data,
+                      n_class_rand,
+                      this->n_iter,
+                      this->n_shift,
+                      this->flip,
+                      this->bckg_class,
+                      this->seed,
+                      this->n_threads) ;
+        Matrix3D<double> model_rand = em.get_sequence_models() ;
+        // copy them into model
+        for(int i_rand=0, i_mod=n_class_given; i_rand<n_class_rand; i_rand++, i_mod++)
+        {   for(int j=0; j<(int)model_len; j++)
+            {   for(int k=0; k<4; k++)
+                {   model(i_mod,j,k) = model_rand(i_rand,j,k) ; }
+            }
+        }
+    }
+    return model ;
+}
+
+int main(int argn, char** argv)
+{   EMSequenceApplication app(argn, argv) ;
+    return app.run() ;
+}
+
diff --git a/src/Applications/ChIPPartitioningApplication.hpp b/src/Applications/EMSequenceApplication.hpp
similarity index 53%
copy from src/Applications/ChIPPartitioningApplication.hpp
copy to src/Applications/EMSequenceApplication.hpp
index 912b9aa..fd0a122 100644
--- a/src/Applications/ChIPPartitioningApplication.hpp
+++ b/src/Applications/EMSequenceApplication.hpp
@@ -1,106 +1,132 @@
-#ifndef CHIPPPARTITIONINGAPPLICATION_HPP
-#define CHIPPPARTITIONINGAPPLICATION_HPP
+#ifndef EMSEQUENCEAPPLICATION_HPP
+#define EMSEQUENCEAPPLICATION_HPP
 
 #include <ApplicationInterface.hpp>
-#include <EMEngine.hpp>
-#include <matrices.hpp>
 
 #include <string>
+#include <vector>
+
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
 
 /*!
- * \brief The ChIPPartitioningApplication class is a wrapper around an EMEngine
- * instance creating an autonomous application to classify data by directly
+ * \brief The EMSequenceApplication class is a wrapper around an EMSequence
+ * instance creating an autonomous application to classify sequences by directly
  * passing all the options and parameters from the command line.
  */
-class ChIPPartitioningApplication: public ApplicationInterface
+class EMSequenceApplication: public ApplicationInterface
 {
     public:
-        ChIPPartitioningApplication() = delete ;
-        ChIPPartitioningApplication(const ChIPPartitioningApplication& app) = delete ;
+        EMSequenceApplication() = delete ;
+        EMSequenceApplication(const EMSequenceApplication& app) = delete ;
         /*!
          * \brief Constructs an object from the command line
          * options.
          * \param argn the number of options passed to the
          * main() function.
          * \param argv the vector of options passed to the
          * main() function.
          */
-        ChIPPartitioningApplication(int argn, char** argv) ;
+        EMSequenceApplication(int argn, char** argv) ;
 
         /*!
          * \brief Runs the application. The data are classified
          * using the given settings and the posterior probability
          * matrix is returned through the stdout.
          * The matrix is a 4D matrix with dimensions :
          * regions, class, shift flip.
          * \return an exit code  EXIT_SUCCESS or EXIT_FAILURE
          * to return to the OS.
          */
         virtual int run() override ;
 
     private:
         /*!
          * \brief Parses the program command line options and
          * sets the object field accordingly.
          * If the help option is detected, the "runnable"
          * field is set to false and subsequent calls to
          * run() will produce nothing.
          * \param argn the number of options passed to the
          * main() function.
          * \param argv the vector of options passed to the
          * main() function.
          * \throw std::invalid_argument if an error is found
          * in the program options.
          */
         void parseOptions(int argn, char** argv) ;
 
         /*!
-         * \brief the paths to the files containing the read
-         * density data.
+         * \brief Initialise the class models if matrices
+         * are given as initial class motifs.
+         * If the given class motifs are shorter than the
+         * model after accounting for shifting, extra columns
+         * with uniform probabilities will be added on each
+         * side.
+         * If the number of classes is higher than the
+         * number of given motifs, extra classes will be
+         * initialised randomly.A background class is included
+         * if needed.
+         * \param model_len the number of positions (columns)
+         * of the model to initialise.
+         * \param data the sequence matrix, in integer format.
+         * \param motif_paths the paths to the files containing
+         * the probability matrices to use to initialise the
+         * class motifs.
+         * \return
          */
-        std::string file_read ;
+        Matrix3D<double> init_model(size_t model_len,
+                                    const Matrix2D<int>& data,
+                                    const std::vector<std::string>& motif_paths) const ;
+
         /*!
-         * \brief the path to the file containing the
-         * sequence data.
+         * \brief the paths to the file containing the sequence
+         * data.
          */
-        std::string file_sequence ;
+        std::string file_seq ;
+
+        /*!
+         * \brief a coma separated list of files containing the
+         * initial motif matrices.
+         */
+        std::string files_motif ;
+
         /*!
          * \brief the number of classes to partition the data into.
          */
         size_t n_class ;
         /*!
          * \brief the number of iterations allowed.
          */
         size_t n_iter ;
         /*!
          * \brief the shifting freedom.
          */
         size_t n_shift ;
         /*!
          * \brief whether flipping freedom is allowed.
          */
         bool flip ;
-
         /*!
-         * \brief the number of threads.
+         * \brief whether a constant class to model the
+         * sequence background should be added. This
+         * class has the sequence background probabilities
+         * at each position.
          */
-        size_t n_threads ;
-
+        bool bckg_class ;
         /*!
-         * \brief the seeding method to use.
+         * \brief the number of threads.
          */
-        EMEngine::seeding_codes seeding ;
+        size_t n_threads ;
         /*!
          * \brief a seed to initialise the random number generator.
          */
         std::string seed ;
-
         /*!
          * \brief a flag indicating whether the core of run() can be
          * run or not.
          */
         bool runnable ;
 } ;
 
-
-#endif // CHIPPPARTITIONINGAPPLICATION_HPP
+#endif // EMSEQUENCEAPPLICATION_HPP
diff --git a/src/Applications/ProbToModelApplication.cpp b/src/Applications/ProbToModelApplication.cpp
index f9e84aa..babc0ef 100644
--- a/src/Applications/ProbToModelApplication.cpp
+++ b/src/Applications/ProbToModelApplication.cpp
@@ -1,203 +1,206 @@
 #include <ProbToModelApplication.hpp>
 
 #include <boost/program_options.hpp>
 #include <string>
 #include <iostream>
 #include <stdexcept>  // std::invalid_argument, std::runtime_error
 #include <ModelComputer.hpp>
 #include <ReadModelComputer.hpp>
 #include <SequenceModelComputer.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix4D.hpp>
 
 namespace po = boost::program_options ;
 
+typedef std::vector<double> vector_d ;
+
 
 ProbToModelApplication::ProbToModelApplication(int argn, char** argv)
     : file_read(""), file_seq(""), file_prob(""),
       n_threads(0), runnable(false)
 {   this->parseOptions(argn, argv) ; }
 
 ProbToModelApplication::~ProbToModelApplication()
 {}
 
 int ProbToModelApplication::run()
 {   if(this->runnable)
     {
         // load data
         std::string file_data ;
         bool read_data = false ;
         bool seq_data  = false ;
         if(this->file_read != "")
         {   file_data = this->file_read ;
             read_data = true ; seq_data = false ;
         }
         else if(this->file_seq != "")
         {   file_data = this->file_seq ;
             read_data = false ; seq_data = true ;
         }
         else
         {   std::string msg("Error! Could not determine the type of the data!") ;
             throw std::runtime_error(msg) ;
         }
-        matrix2d_i data = read_matrix2d_i(file_data) ;
-        matrix4d_d prob = read_matrix4d_d(this->file_prob) ;
-        if(data.size() != prob.size())
+        Matrix2D<int> data(file_data) ;
+        Matrix4D<double> prob(this->file_prob) ;
+        if(data.get_nrow() != prob.get_dim()[0])
         {   char msg[4096] ;
             sprintf(msg, "Error! data and prob matrices have unequal "
                           "row numbers (%zu / %zu)!",
-                    data.size(), prob.size()) ;
+                    data.get_nrow(), prob.get_dim()[0]) ;
             throw std::runtime_error(msg) ;
         }
-        else if(data[0].size() < prob[0][0].size())
+        else if(data.get_ncol() < prob.get_dim()[2])
         {   char msg[4096] ;
             sprintf(msg, "Error! too many shift states for the data!"
                          "%zu shift states and %zu columns in data)!",
-                    prob[0][0].size(), data[0].size()) ;
+                    prob.get_dim()[2], data.get_ncol()) ;
             throw std::runtime_error(msg) ;
         }
 
         // get the data model
         ModelComputer* ptr = nullptr ;
         if(read_data)
         {   ptr = new ReadModelComputer(data, prob, this->n_threads) ; }
         else if(seq_data)
         {   ptr = new SequenceModelComputer(data, prob, this->n_threads) ; }
-        matrix2d_d model = ptr->get_model() ;
+        Matrix2D<double> model = ptr->get_model() ;
         delete ptr ;
         ptr = nullptr ;
 
         // compute the class prob
-        size_t n_row   = prob.size() ;
-        size_t n_class = prob[0].size() ;
-        size_t n_shift = prob[0][0].size() ;
-        size_t n_flip  = prob[0][0][0].size() ;
+        size_t n_row   = prob.get_dim()[0] ;
+        size_t n_class = prob.get_dim()[1] ;
+        size_t n_shift = prob.get_dim()[2] ;
+        size_t n_flip  = prob.get_dim()[3] ;
 
         vector_d class_prob(n_class, 0.) ;
         double p_tot = 0. ;
         for(size_t i=0; i<n_row; i++)
         {   for(size_t j=0; j<n_class; j++)
             {   for(size_t k=0; k<n_shift; k++)
                 {   for(size_t l=0; l<n_flip; l++)
-                    {   class_prob[j] += prob[i][j][k][l] ;
-                        p_tot         += prob[i][j][k][l] ;
+                    {   class_prob[j] += prob(i,j,k,l) ;
+                        p_tot         += prob(i,j,k,l) ;
                     }
                 }
             }
         }
         for(auto& prob : class_prob)
         {   prob /= p_tot ; }
 
         // create a matrix containing the class prob in the 1st
         // column and the model in the remaining columns
-        matrix2d_d model_final(model.size(),
-                               vector_d(model[0].size() + 1)) ;
+        Matrix2D<double> model_final(model.get_nrow(),
+                                     model.get_ncol() + 1) ;
         // 1st column contain the class prob
         if(read_data)
-        {   for(size_t i=0; i<model_final.size(); i++)
-            {   model_final[i][0] = class_prob[i] ; }
+        {   for(size_t i=0; i<model_final.get_nrow(); i++)
+            {   model_final(i,0) = class_prob[i] ; }
         }
         else if(seq_data)
         {   size_t i_class = 0 ;
-            for(size_t i=0; i<model_final.size(); i++)
+            for(size_t i=0; i<model_final.get_nrow(); i++)
             {   if((i != 0) and (i % 4 == 0))
                 {   i_class++ ; }
-                model_final[i][0] = class_prob[i_class] ;
+                model_final(i,0) = class_prob[i_class] ;
             }
         }
         // fill the remaining with the model parameters
-        for(size_t i=0; i<model.size(); i++)
-        {   for(size_t j=0; j<model[0].size(); j++)
-            {   model_final[i][j+1] = model[i][j] ; }
+        for(size_t i=0; i<model.get_nrow(); i++)
+        {   for(size_t j=0; j<model.get_ncol(); j++)
+            {   model_final(i,j+1) = model(i,j) ; }
         }
         std::cout << model_final << std::endl ;
         return 0 ;
     }
     else
     {   return 1 ; }
 }
 
 void ProbToModelApplication::parseOptions(int argn, char** argv)
 {   // no option to parse
     if(argv == nullptr)
     {   std::string message = "no options to parse!" ;
         throw std::invalid_argument(message) ;
     }
 
     // help messages
     std::string desc_msg =         "\n"
                                    "ProbToRef reconstructs the class models \n"
                                    "from the classification results (posterior \n"
                                    "probabilities computed by ChIPPartitioning \n"
                                    "and the data).\n\n" ;
     std::string opt_help_msg     = "Produces this help message." ;
     std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations,\n "
                                    "by default 0 (no parallelization)." ;
     std::string opt_read_msg     = "The path to the file containing the data, if the data are read counts.";
     std::string opt_seq_msg      = "The path to the file containing the data, if the data are sequences.";
     std::string opt_prob_msg     = "The path to the file containing the posterior \n"
                                    "probabilities." ;
     // option parser
     boost::program_options::variables_map vm ;
     boost::program_options::options_description desc(desc_msg) ;
 
     std::string seeding_tmp ;
 
     desc.add_options()
                 ("help,h",       opt_help_msg.c_str())
 
                 ("read,",       po::value<std::string>(&(this->file_read)), opt_read_msg.c_str())
                 ("seq,",        po::value<std::string>(&(this->file_seq)),  opt_seq_msg.c_str())
                 ("prob,",       po::value<std::string>(&(this->file_prob)), opt_prob_msg.c_str())
 
                 ("thread",      po::value<std::size_t>(&(this->n_threads)), opt_thread_msg.c_str()) ;
 
     // parse
     try
     {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
         po::notify(vm) ;
     }
     catch(std::invalid_argument& e)
     {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
         throw std::invalid_argument(msg) ;
     }
     catch(...)
     {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
 
     bool help = vm.count("help") ;
 
 
     // checks unproper option settings
     if((this->file_read == "") and
        (this->file_seq  == "") and
        (not help))
     {   std::string msg("Error! No data file was given (--read or --seq)!") ;
         throw std::invalid_argument(msg) ;
     }
     else if((this->file_read != "") and
             (this->file_seq  != "") and
             (not help))
     {   std::string msg("Error! --read and --seq are mutually exclusive!") ;
         throw std::invalid_argument(msg) ;
     }
     else if(this->file_prob == "" and (not help))
     {   std::string msg("Error! No posterior probabily file was given (--prob)!") ;
         throw std::invalid_argument(msg) ;
     }
 
     // help invoked, run() cannot be invoked
     if(help)
     {   std::cout << desc << std::endl ;
         this->runnable = false ;
         return ;
     }
     // everything fine, run() can be called
     else
     {   this->runnable = true ;
         return ;
     }
 }
 
 int main(int argn, char** argv)
 {   ProbToModelApplication app(argn, argv) ;
     return app.run() ;
 }
diff --git a/src/Applications/ReadModelExtenderApplication.cpp b/src/Applications/ReadModelExtenderApplication.cpp
new file mode 100644
index 0000000..c4b0545
--- /dev/null
+++ b/src/Applications/ReadModelExtenderApplication.cpp
@@ -0,0 +1,269 @@
+#include <ReadModelExtenderApplication.hpp>
+
+#include <boost/program_options.hpp>
+#include <string>
+#include <iostream>
+#include <stdexcept>  // std::invalid_argument, std::runtime_error
+
+#include <CorrelationMatrixCreator.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix4D.hpp>
+#include <ReadLayer.hpp>
+#include <ReadModelComputer.hpp>
+
+namespace po = boost::program_options ;
+
+
+// the valid values for --method option
+std::string method_read            = "read" ;
+std::string method_read_atac       = "read_atac" ;
+std::string method_fragment        = "fragment" ;
+std::string method_fragment_center = "fragment_center" ;
+
+
+ReadModelExtenderApplication::ReadModelExtenderApplication(int argn, char** argv)
+    : file_bed(""), file_bam(""), file_bai(""), file_prob(""),
+      from(0), to(0), ext(0), bin_size(0),
+      method(CorrelationMatrixCreator::FRAGMENT),
+      n_threads(0), runnable(false)
+{   this->parseOptions(argn, argv) ; }
+
+ReadModelExtenderApplication::~ReadModelExtenderApplication()
+{}
+
+int ReadModelExtenderApplication::run()
+{   if(this->runnable)
+    {   // extend limits
+        int ext_right = this->ext/2 ;
+        int ext_left  = this->ext - ext_right ;
+        this->from -= ext_left ;
+        this->to   += ext_right ;
+
+        // create extended matrix
+        CorrelationMatrixCreator mc(this->file_bed,
+                                    this->file_bam,
+                                    this->file_bai,
+                                    this->from,
+                                    this->to,
+                                    this->bin_size,
+                                    this->method) ;
+        Matrix2D<int> data = mc.create_matrix() ;
+
+        // compute model
+        Matrix4D<double> prob(this->file_prob) ;
+        if(prob.get_dim()[0] != data.get_nrow())
+        {   char msg[4096] ;
+            sprintf(msg,
+                    "Error! data matrix and probability matrix have "
+                    "unequal row numbers (%zu and %zu)",
+                    prob.get_dim()[0],
+                    data.get_nrow()) ;
+            throw std::invalid_argument(msg) ;
+        }
+        size_t n_row   = prob.get_dim()[0] ;
+        size_t n_class = prob.get_dim()[1] ;
+        size_t n_shift = prob.get_dim()[2] ;
+        size_t n_flip  = prob.get_dim()[3] ;
+
+        ReadModelComputer model_cp(data, prob, this->n_threads) ;
+        Matrix2D<double> model = model_cp.get_model() ;
+
+        // compute class prob
+        vector_d class_prob(n_class, 0.) ;
+        double p_tot = 0. ;
+        for(size_t i=0; i<n_row; i++)
+        {   for(size_t j=0; j<n_class; j++)
+            {   for(size_t k=0; k<n_shift; k++)
+                {   for(size_t l=0; l<n_flip; l++)
+                    {   class_prob[j] += prob(i,j,k,l) ;
+                        p_tot         += prob(i,j,k,l) ;
+                    }
+                }
+            }
+        }
+        for(auto& prob : class_prob)
+        {   prob /= p_tot ; }
+
+        // create a matrix containing the class prob in the 1st
+        // column and the model in the remaining columns
+        Matrix2D<double> model_final(model.get_nrow(),
+                                     model.get_ncol() + 1) ;
+        // 1st column contain the class prob
+        for(size_t i=0; i<model_final.get_nrow(); i++)
+        {   model_final(i,0) = class_prob[i] ; }
+
+        // fill the remaining with the model parameters
+        for(size_t i=0; i<model.get_nrow(); i++)
+        {   for(size_t j=0; j<model.get_ncol(); j++)
+            {   model_final(i,j+1) = model(i,j) ; }
+        }
+        std::cout << model_final << std::endl ;
+        return 0 ;
+    }
+    else
+    {   return 1 ; }
+}
+
+void ReadModelExtenderApplication::parseOptions(int argn, char** argv)
+{   // no option to parse
+    if(argv == nullptr)
+    {   std::string message = "no options to parse!" ;
+        throw std::invalid_argument(message) ;
+    }
+
+    // help messages
+    std::string desc_msg =         "\n"
+                                   "ReadModelExtender is an application to extend a read count model of'\n"
+                                   "length L (L' = L - S + 1 where L is the number of column of the data\n"
+                                   "matrix and S the shifting freedom allowed during the classification)\n"
+                                   "to a new model of length L'' = L' + E  (E is the number of columns\n"
+                                   "to add to the model) given the read count matrix and the results of\n"
+                                   "its classification (posterior probability matrix).\n"
+                                   "To do this, the read count matrix from which the original model was\n"
+                                   "computed is extended (plus 0.5*E columns on each side) and a model is\n"
+                                   "computed by realigning the extended matrix as the original matrix was,\n"
+                                   "using the given posterior probabities as class assignment\n"
+                                   "probabilities.\n"
+                                   "The extended model is returned through the stdout.\n\n" ;
+
+
+    std::string opt_help_msg     = "Produces this help message." ;
+    std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations,\n "
+                                   "by default 0 (no parallelization)." ;
+    std::string opt_bed_msg      = "The path to the BED file containing the references of the original matrix.";
+    std::string opt_bam_msg      = "The path to the BAM file containing the targets of the original matrix.";
+    std::string opt_bai_msg      = "The path to the BAI file containing the BAM file index.";
+    std::string opt_prob_msg     = "The path to the file containing the posterior probabilities\n"
+                                   "of the original matrix classification." ;
+    std::string opt_from_msg     = "The upstream limit - in relative coordinate - of the region to build "
+                                   "around each reference center, as it was for the original matrix." ;
+    std::string opt_to_msg       = "The downstream limit - in relative coordinate - of the region to build "
+                                   "around each reference center, as it was for the original matrix." ;
+    std::string opt_ext_msg      = "The number of columns (E) to add to the model. Each side will be\n"
+                                   "extended by 0.5*E." ;
+    std::string opt_binsize_msg  = "The size of the bins, as it was for the original matrix." ;
+    char tmp[4096] ;
+    sprintf(tmp,
+                                   "How the data in the BAM file should be handled when computing\n"
+                                   "the number of counts in each bin, as it was for the original\n"
+                                   "matrix.\n"
+                                   "\t\"%s\" uses each position within the reads (by default)\n"
+                                   "\t\"%s\" uses only the insertion site for ATAC-seq data\n"
+                                   "\t\"%s\" uses each position within the fragments\n"
+                                   "\t\"%s\" uses only the fragment central positions\n",
+            method_read.c_str(),
+            method_read_atac.c_str(),
+            method_fragment.c_str(),
+            method_fragment_center.c_str()) ;
+
+     std::string opt_method_msg = tmp ;
+
+    // option parser
+    boost::program_options::variables_map vm ;
+    boost::program_options::options_description desc(desc_msg) ;
+
+    std::string method(method_read) ;
+
+    desc.add_options()
+                ("help,h",       opt_help_msg.c_str())
+
+                ("bed",     po::value<std::string>(&(this->file_bed)),   opt_bed_msg.c_str())
+                ("bam",     po::value<std::string>(&(this->file_bam)),   opt_bam_msg.c_str())
+                ("bai",     po::value<std::string>(&(this->file_bai)),   opt_bai_msg.c_str())
+                ("prob,",   po::value<std::string>(&(this->file_prob)),  opt_prob_msg.c_str())
+
+                ("from",    po::value<int>(&(this->from)),               opt_from_msg.c_str())
+                ("to",      po::value<int>(&(this->to)),                 opt_to_msg.c_str())
+                ("ext",     po::value<int>(&(this->ext)),                opt_ext_msg.c_str())
+                ("binSize", po::value<int>(&(this->bin_size)),           opt_binsize_msg.c_str())
+                ("method",  po::value<std::string>(&(method)),           opt_method_msg.c_str())
+
+                ("thread",  po::value<std::size_t>(&(this->n_threads)),  opt_thread_msg.c_str()) ;
+
+    // parse
+    try
+    {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
+        po::notify(vm) ;
+    }
+    catch(std::invalid_argument& e)
+    {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
+        throw std::invalid_argument(msg) ;
+    }
+    catch(...)
+    {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
+
+    bool help = vm.count("help") ;
+
+    // checks unproper option settings
+    if(this->file_bed == "" and (not help))
+    {   std::string msg("Error! No BED file was given (--bed)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->file_bam == "" and (not help))
+    {   std::string msg("Error! No BAM file was given (--bam)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->file_bai == "" and (not help))
+    {   std::string msg("Error! No BAM index file was given (--bai)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->file_prob == "" and (not help))
+    {   std::string msg("Error! No posterior probability file was given (--prob)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->from == 0 and this->to == 0 and (not help))
+    {   std::string msg("Error! No range given (--from and --to)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->from >= this->to and (not help))
+    {   std::string msg("Error! from shoud be smaller than to (--from and --to)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(ext <= 0 and (not help))
+    {   std::string msg("Error! the number of columns to add should be > 0 (--ext)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->bin_size <= 0 and (not help))
+    {   std::string msg("Error! bin size should be bigger than 0 (--binSize)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(method != method_read and
+            method != method_read_atac and
+            method != method_fragment and
+            method != method_fragment_center)
+    {   char msg[4096] ;
+        sprintf(msg, "Error! method should be %s, %s, %s or %s (--method)",
+                method_read.c_str(),
+                method_read_atac.c_str(),
+                method_fragment.c_str(),
+                method_fragment_center.c_str()) ;
+        throw std::invalid_argument(msg) ;
+    }
+
+    // set method
+    if(method == method_read)
+    {   this->method = CorrelationMatrixCreator::READ ; }
+    else if(method == method_read_atac)
+    {   this->method = CorrelationMatrixCreator::READ_ATAC ; }
+    else if(method == method_fragment)
+    {   this->method = CorrelationMatrixCreator::FRAGMENT ; }
+    else if(method == method_fragment_center)
+    {   this->method = CorrelationMatrixCreator::FRAGMENT_CENTER ; }
+
+    // help invoked, run() cannot be invoked
+    if(help)
+    {   std::cout << desc << std::endl ;
+        this->runnable = false ;
+        return ;
+    }
+    // everything fine, run() can be called
+    else
+    {   this->runnable = true ;
+        return ;
+    }
+}
+
+int main(int argn, char** argv)
+{   ReadModelExtenderApplication app(argn, argv) ;
+    return app.run() ;
+}
diff --git a/src/Applications/ReadModelExtenderApplication.hpp b/src/Applications/ReadModelExtenderApplication.hpp
new file mode 100644
index 0000000..5fc8379
--- /dev/null
+++ b/src/Applications/ReadModelExtenderApplication.hpp
@@ -0,0 +1,122 @@
+#ifndef READMODELEXTENDERAPPLICATION_HPP
+#define READMODELEXTENDERAPPLICATION_HPP
+
+#include <ApplicationInterface.hpp>
+
+#include <iostream>
+#include <string>
+
+#include <CorrelationMatrixCreator.hpp>
+
+/*!
+ * \brief The ReadModelExtenderApplication class is a class implementing an
+ * application to extend a read model of length L' (L' = L - S + 1
+ * where L is the number of column of the data matrix and S the
+ * shifting freedom allowed during the classification) to a new model
+ * length L'' = L' + E  (E is the number of columns to add to the
+ * model) given the data matrix and the results of the classification
+ * (posterior probability matrix).
+ * To do this, the read count matrix from which the original model
+ * was computed is extended (0.5*E columns on each side) and a model
+ * is computed using the new matrix and the given posterior probabities.
+ * The extended model is returned through the stdout.
+ */
+class ReadModelExtenderApplication : public ApplicationInterface
+{
+   public:
+        ReadModelExtenderApplication() = delete ;
+        ReadModelExtenderApplication(const ReadModelExtenderApplication& app) = delete ;
+        /*!
+         * \brief Constructs an object from the command line
+         * options.
+         * \param argn the number of options passed to the
+         * main() function.
+         * \param argv the vector of options passed to the
+         * main() function.
+         */
+        ReadModelExtenderApplication(int argn, char** argv) ;
+        /*!
+         * \brief Destructor.
+         */
+        virtual ~ReadModelExtenderApplication() override ;
+        /*!
+         * \brief Runs the application. The data new model
+         * is computed and displayed through the
+         * stdout.
+         * \return the exit code.
+         */
+        virtual int run() override ;
+
+    private:
+        /*!
+         * \brief Parses the program command line options and
+         * sets the object field accordingly.
+         * If the help option is detected, the "runnable"
+         * field is set to false and subsequent calls to
+         * run() will produce nothing.
+         * \param argn the number of options passed to the
+         * main() function.
+         * \param argv the vector of options passed to the
+         * main() function.
+         * \throw std::invalid_argument if an error is found
+         * in the program options.
+         */
+        void parseOptions(int argn, char** argv) ;
+
+        /*!
+         * \brief the path to the bed file.
+         */
+        std::string file_bed ;
+        /*!
+         * \brief the path to the bam file.
+         */
+        std::string file_bam ;
+        /*!
+         * \brief the path to the bam index file.
+         */
+        std::string file_bai ;
+        /*!
+         * \brief the path to the file containing the
+         * classification posterior probabilities.
+         */
+        std::string file_prob ;
+        /*!
+         * \brief a relative coordinate indicating the
+         * most downstream position to consider around
+         * each region in the bed file.
+         */
+        int from ;
+        /*!
+         * \brief a relative coordinate indicating the
+         * most upstream position to consider around
+         * each region in the bed file.
+         */
+        int to ;
+        /*!
+         * \brief the number of columns to add to the
+         * matrix (half of this value on each side).
+         */
+        int ext ;
+        /*!
+         * \brief the size of the bin that will be used
+         * to bin the signal in the regions [from,to] around
+         * each region in the bed file.
+         */
+        int bin_size ;
+        /*!
+         * \brief How to consider the sequenced fragments when computing
+         * the bin values.
+         */
+        CorrelationMatrixCreator::methods method ;
+
+        /*!
+         * \brief the number of threads.
+         */
+        size_t n_threads ;
+        /*!
+         * \brief whether run() can be called.
+         */
+        bool runnable ;
+} ;
+
+#endif // READMODELEXTENDERAPPLICATION_HPP
diff --git a/src/Applications/SequenceModelExtenderApplication.cpp b/src/Applications/SequenceModelExtenderApplication.cpp
new file mode 100644
index 0000000..03ba059
--- /dev/null
+++ b/src/Applications/SequenceModelExtenderApplication.cpp
@@ -0,0 +1,212 @@
+#include <SequenceModelExtenderApplication.hpp>
+
+#include <boost/program_options.hpp>
+#include <string>
+#include <iostream>
+#include <stdexcept>  // std::invalid_argument, std::runtime_error
+
+#include <CorrelationMatrixCreator.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix4D.hpp>
+#include <SequenceLayer.hpp>
+#include <SequenceModelComputer.hpp>
+
+namespace po = boost::program_options ;
+
+
+SequenceModelExtenderApplication::SequenceModelExtenderApplication(int argn, char** argv)
+    : file_bed(""), file_fasta(""), file_prob(""),
+      from(0), to(0), ext(0),
+      n_threads(0), runnable(false)
+{   this->parseOptions(argn, argv) ; }
+
+SequenceModelExtenderApplication::~SequenceModelExtenderApplication()
+{}
+
+int SequenceModelExtenderApplication::run()
+{   if(this->runnable)
+    {   // extend limits
+        int ext_right = this->ext/2 ;
+        int ext_left  = this->ext - ext_right ;
+        this->from -= ext_left ;
+        this->to   += ext_right ;
+
+        // create extended matrix
+        SequenceMatrixCreator mc(this->file_bed,
+                                 this->file_fasta,
+                                 this->from,
+                                 this->to) ;
+        Matrix2D<int> data = mc.create_matrix() ;
+
+        // compute model
+        Matrix4D<double> prob(this->file_prob) ;
+        if(prob.get_dim()[0] != data.get_nrow())
+        {   char msg[4096] ;
+            sprintf(msg,
+                    "Error! data matrix and probability matrix have "
+                    "unequal row numbers (%zu and %zu)",
+                    prob.get_dim()[0],
+                    data.get_nrow()) ;
+            throw std::invalid_argument(msg) ;
+        }
+        size_t n_row   = prob.get_dim()[0] ;
+        size_t n_class = prob.get_dim()[1] ;
+        size_t n_shift = prob.get_dim()[2] ;
+        size_t n_flip  = prob.get_dim()[3] ;
+
+        SequenceModelComputer model_cp(data, prob, this->n_threads) ;
+        Matrix2D<double> model = model_cp.get_model() ;
+
+        // compute class prob
+        vector_d class_prob(n_class, 0.) ;
+        double p_tot = 0. ;
+        for(size_t i=0; i<n_row; i++)
+        {   for(size_t j=0; j<n_class; j++)
+            {   for(size_t k=0; k<n_shift; k++)
+                {   for(size_t l=0; l<n_flip; l++)
+                    {   class_prob[j] += prob(i,j,k,l) ;
+                        p_tot         += prob(i,j,k,l) ;
+                    }
+                }
+            }
+        }
+        for(auto& prob : class_prob)
+        {   prob /= p_tot ; }
+
+        // create a matrix containing the class prob in the 1st
+        // column and the model in the remaining columns
+        Matrix2D<double> model_final(model.get_nrow(),
+                                     model.get_ncol() + 1) ;
+        // 1st column contain the class prob
+        size_t i_class = 0 ;
+        for(size_t i=0; i<model_final.get_nrow(); i++)
+        {   if((i != 0) and (i % 4 == 0))
+            {   i_class++ ; }
+            model_final(i,0) = class_prob[i_class] ;
+        }
+
+        // fill the remaining with the model parameters
+        for(size_t i=0; i<model.get_nrow(); i++)
+        {   for(size_t j=0; j<model.get_ncol(); j++)
+            {   model_final(i,j+1) = model(i,j) ; }
+        }
+        std::cout << model_final << std::endl ;
+        return 0 ;
+    }
+    else
+    {   return 1 ; }
+}
+
+void SequenceModelExtenderApplication::parseOptions(int argn, char** argv)
+{   // no option to parse
+    if(argv == nullptr)
+    {   std::string message = "no options to parse!" ;
+        throw std::invalid_argument(message) ;
+    }
+
+    // help messages
+    std::string desc_msg =         "\n"
+                                   "SequenceModelExtender is an application to extend a sequence model of'\n"
+                                   "length L (L' = L - S + 1 where L is the number of column of the data\n"
+                                   "matrix and S the shifting freedom allowed during the classification)\n"
+                                   "to a new model of length L'' = L' + E  (E is the number of columns\n"
+                                   "to add to the model) given the sequence matrix and the results of\n"
+                                   "its classification (posterior probability matrix).\n"
+                                   "To do this, the sequence matrix from which the original model was\n"
+                                   "computed is extended (plus 0.5*E columns on each side) and a model is\n"
+                                   "computed by realigning the extended matrix as the original matrix was,\n"
+                                   "using the given posterior probabities as class assignment\n"
+                                   "probabilities.\n"
+                                   "The extended model is returned through the stdout.\n\n" ;
+
+
+    std::string opt_help_msg     = "Produces this help message." ;
+    std::string opt_thread_msg   = "The number of threads dedicated to parallelize the computations,\n "
+                                   "by default 0 (no parallelization)." ;
+    std::string opt_bed_msg      = "The path to the BED file containing the references of the original matrix.";
+    std::string opt_fasta_msg    = "The path to the fasta file containing sequences used to create the original\n"
+                                   "matrix.";
+    std::string opt_prob_msg     = "The path to the file containing the posterior probabilities\n"
+                                   "of the original matrix classification." ;
+    std::string opt_from_msg     = "The upstream limit - in relative coordinate - of the region to build "
+                                   "around each reference center, as it was for the original matrix." ;
+    std::string opt_to_msg       = "The downstream limit - in relative coordinate - of the region to build "
+                                   "around each reference center, as it was for the original matrix." ;
+    std::string opt_ext_msg      = "The number of columns (E) to add to the model. Each side will be\n"
+                                   "extended by 0.5*E." ;
+    std::string opt_binsize_msg  = "The size of the bins, as it was for the original matrix." ;
+
+    // option parser
+    boost::program_options::variables_map vm ;
+    boost::program_options::options_description desc(desc_msg) ;
+
+    desc.add_options()
+                ("help,h",       opt_help_msg.c_str())
+
+                ("bed",     po::value<std::string>(&(this->file_bed)),   opt_bed_msg.c_str())
+                ("fasta",   po::value<std::string>(&(this->file_fasta)), opt_fasta_msg.c_str())
+                ("prob,",   po::value<std::string>(&(this->file_prob)),  opt_prob_msg.c_str())
+
+                ("from",    po::value<int>(&(this->from)),               opt_from_msg.c_str())
+                ("to",      po::value<int>(&(this->to)),                 opt_to_msg.c_str())
+                ("ext",     po::value<int>(&(this->ext)),                opt_ext_msg.c_str())
+
+                ("thread",  po::value<std::size_t>(&(this->n_threads)),  opt_thread_msg.c_str()) ;
+
+    // parse
+    try
+    {   po::store(po::parse_command_line(argn, argv, desc), vm) ;
+        po::notify(vm) ;
+    }
+    catch(std::invalid_argument& e)
+    {   std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
+        throw std::invalid_argument(msg) ;
+    }
+    catch(...)
+    {	throw std::invalid_argument("An unknown error occured while parsing the options") ; }
+
+    bool help = vm.count("help") ;
+
+    // checks unproper option settings
+    if(this->file_bed == "" and (not help))
+    {   std::string msg("Error! No BED file was given (--bed)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->file_fasta == "" and (not help))
+    {   std::string msg("Error! No fasta file was given (--fasta)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->file_prob == "" and (not help))
+    {   std::string msg("Error! No posterior probability file was given (--prob)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->from == 0 and this->to == 0 and (not help))
+    {   std::string msg("Error! No range given (--from and --to)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(this->from >= this->to and (not help))
+    {   std::string msg("Error! from shoud be smaller than to (--from and --to)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(ext <= 0 and (not help))
+    {   std::string msg("Error! the number of columns to add should be > 0 (--ext)!") ;
+        throw std::invalid_argument(msg) ;
+    }
+
+    // help invoked, run() cannot be invoked
+    if(help)
+    {   std::cout << desc << std::endl ;
+        this->runnable = false ;
+        return ;
+    }
+    // everything fine, run() can be called
+    else
+    {   this->runnable = true ;
+        return ;
+    }
+}
+
+int main(int argn, char** argv)
+{   SequenceModelExtenderApplication app(argn, argv) ;
+    return app.run() ;
+}
diff --git a/src/Applications/SequenceModelExtenderApplication.hpp b/src/Applications/SequenceModelExtenderApplication.hpp
new file mode 100644
index 0000000..6bc00d7
--- /dev/null
+++ b/src/Applications/SequenceModelExtenderApplication.hpp
@@ -0,0 +1,107 @@
+#ifndef SEQUENCEMODELEXTENDERAPPLICATION_HPP
+#define SEQUENCEMODELEXTENDERAPPLICATION_HPP
+
+#include <ApplicationInterface.hpp>
+
+#include <iostream>
+#include <string>
+
+#include <SequenceMatrixCreator.hpp>
+
+/*!
+ * \brief The SequenceModelExtenderApplication class is a class implementing an
+ * application to extend a sequence model of length L' (L' = L - S + 1
+ * where L is the number of column of the sequence matrix and S the
+ * shifting freedom allowed during the classification) to a new model
+ * length L'' = L' + E  (E is the number of columns to add to the
+ * model) given the data matrix and the results of the classification
+ * (posterior probability matrix).
+ * To do this, the sequence count matrix from which the original model
+ * was computed is extended (0.5*E columns on each side) and a model
+ * is computed using the new matrix and the given posterior probabities.
+ * The extended model is returned through the stdout.
+ */
+class SequenceModelExtenderApplication : public ApplicationInterface
+{
+   public:
+        SequenceModelExtenderApplication() = delete ;
+        SequenceModelExtenderApplication(const SequenceModelExtenderApplication& app) = delete ;
+        /*!
+         * \brief Constructs an object from the command line
+         * options.
+         * \param argn the number of options passed to the
+         * main() function.
+         * \param argv the vector of options passed to the
+         * main() function.
+         */
+        SequenceModelExtenderApplication(int argn, char** argv) ;
+        /*!
+         * \brief Destructor.
+         */
+        virtual ~SequenceModelExtenderApplication() override ;
+        /*!
+         * \brief Runs the application. The data new model
+         * is computed and displayed through the
+         * stdout.
+         * \return the exit code.
+         */
+        virtual int run() override ;
+
+    private:
+        /*!
+         * \brief Parses the program command line options and
+         * sets the object field accordingly.
+         * If the help option is detected, the "runnable"
+         * field is set to false and subsequent calls to
+         * run() will produce nothing.
+         * \param argn the number of options passed to the
+         * main() function.
+         * \param argv the vector of options passed to the
+         * main() function.
+         * \throw std::invalid_argument if an error is found
+         * in the program options.
+         */
+        void parseOptions(int argn, char** argv) ;
+
+        /*!
+         * \brief the path to the bed file.
+         */
+        std::string file_bed ;
+        /*!
+         * \brief the path to the fasta file
+         * containing the sequences.
+         */
+        std::string file_fasta ;
+        /*!
+         * \brief the path to the file containing the
+         * classification posterior probabilities.
+         */
+        std::string file_prob ;
+        /*!
+         * \brief a relative coordinate indicating the
+         * most downstream position to consider around
+         * each region in the bed file.
+         */
+        int from ;
+        /*!
+         * \brief a relative coordinate indicating the
+         * most upstream position to consider around
+         * each region in the bed file.
+         */
+        int to ;
+        /*!
+         * \brief the number of columns to add to the
+         * matrix (half of this value on each side).
+         */
+        int ext ;
+        /*!
+         * \brief the number of threads.
+         */
+        size_t n_threads ;
+        /*!
+         * \brief whether run() can be called.
+         */
+        bool runnable ;
+} ;
+
+#endif // SEQUENCEMODELEXTENDERAPPLICATION_HPP
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9c38267..6926729 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,98 +1,122 @@
 
 # compiler options
 add_compile_options(-std=c++14)
 add_compile_options(-O3)
 add_compile_options(-Wall)
 add_compile_options(-Wextra)
 add_compile_options(-Werror)
 add_compile_options(-Wfatal-errors)
 add_compile_options(-pedantic)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SEQAN_CXX_FLAGS}")
 
 add_definitions (${SEQAN_DEFINITIONS})
 
 # include file location
-include_directories (${SEQAN_INCLUDE_DIRS})
+include_directories(${Boost_INCLUDE_DIRS})
+include_directories(${SEQAN_INCLUDE_DIRS})
 include_directories("${scATACseq_SOURCE_DIR}/src/Matrix")
 include_directories("${scATACseq_SOURCE_DIR}/src/Clustering")
 include_directories("${scATACseq_SOURCE_DIR}/src/Random")
 include_directories("${scATACseq_SOURCE_DIR}/src/Parallel")
 include_directories("${scATACseq_SOURCE_DIR}/src/Statistics")
 include_directories("${scATACseq_SOURCE_DIR}/src/GUI")
 include_directories("${scATACseq_SOURCE_DIR}/src/Applications")
 include_directories("${scATACseq_SOURCE_DIR}/src/Matrix")
 include_directories("${scATACseq_SOURCE_DIR}/src/GenomicTools")
 include_directories("${scATACseq_SOURCE_DIR}/src/Utility")
-
 # compile modules into static libraries
 ## set output directory
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/lib")
 ## build instructions
 add_library(Clustering "Clustering/DataLayer.cpp"
                        "Clustering/ReadLayer.cpp"
                        "Clustering/SequenceLayer.cpp"
-                       "Clustering/EMEngine.cpp"
                        "Clustering/ModelComputer.cpp"
                        "Clustering/ReadModelComputer.cpp"
-                       "Clustering/SequenceModelComputer.cpp")
+                       "Clustering/SequenceModelComputer.cpp"
+                       "Clustering/EMBase.cpp"
+                       "Clustering/EMRead.cpp"
+                       "Clustering/EMSequence.cpp"
+                       "Clustering/EMJoint.cpp")
 
 add_library(Random       "Random/Random.cpp"
                          "Random/RandomNumberGenerator.cpp")
 add_library(Parallel     "Parallel/ThreadPool.cpp")
 add_library(Statistics   "Statistics/Statistics.cpp") 
 add_library(GUI          "GUI/ConsoleProgressBar.cpp"
                          "GUI/Diplayable.cpp"
                          "GUI/Updatable.cpp")
 add_library(GenomicTools "GenomicTools/MatrixCreator.cpp"
+                         "GenomicTools/ReadMatrixCreator.cpp"
                          "GenomicTools/CorrelationMatrixCreator.cpp"
+                         "GenomicTools/SequenceMatrixCreator.cpp"
                          "GenomicTools/GenomeRegion.cpp")
-add_library(Utility      "Utility/matrices.cpp")
+add_library(Utility      "Utility/matrices.cpp"
+                         "Utility/dna_utility.cpp")
 
 ## resolve dependencies
-target_link_libraries(Clustering Random Statistics GUI Parallel ${SEQAN_LIBRARIES})
-target_link_libraries(Parallel   Threads::Threads)
-target_link_libraries(GenomicTools ${SEQAN_LIBRARIES})
+target_link_libraries(Utility      ${SEQAN_LIBRARIES})
+target_link_libraries(Clustering   Utility Random Statistics GUI Parallel ${SEQAN_LIBRARIES})
+target_link_libraries(Parallel     Threads::Threads)
+target_link_libraries(GenomicTools Utility ${SEQAN_LIBRARIES})
 
 # executables
 ## a toy for seqan
 set(EXE_MAIN_SEQAN "main_seqan")
 add_executable(${EXE_MAIN_SEQAN} "main_seqan.cpp")
 target_link_libraries(${EXE_MAIN_SEQAN} ${SEQAN_LIBRARIES} GenomicTools Clustering)
 set_target_properties(${EXE_MAIN_SEQAN} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
 ## a toy for correlation matrix
 set(EXE_MAIN_CORMAT "main_cormat")
 add_executable(${EXE_MAIN_CORMAT} "main_cormat.cpp")
-target_link_libraries(${EXE_MAIN_CORMAT} ${SEQAN_LIBRARIES} GenomicTools)
+target_link_libraries(${EXE_MAIN_CORMAT} ${SEQAN_LIBRARIES} Utility GenomicTools Random)
 set_target_properties(${EXE_MAIN_CORMAT} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
-## a toy for EM usage
-set(EXE_MAIN_EM "main_em")
-add_executable(${EXE_MAIN_EM} "main_em.cpp")
-target_link_libraries(${EXE_MAIN_EM} Clustering Utility)
-set_target_properties(${EXE_MAIN_EM} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
-## a 2nd toy for EM usage
-set(EXE_MAIN_EM2 "main_em2")
-add_executable(${EXE_MAIN_EM2} "main_em2.cpp")
-target_link_libraries(${EXE_MAIN_EM2} Clustering Utility)
-set_target_properties(${EXE_MAIN_EM2} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
 ## an application to create a matrix from BED and a BAM file
 set(EXE_MAIN_BAMMATRIX "CorrelationMatrixCreator")
 add_executable(${EXE_MAIN_BAMMATRIX} "Applications/CorrelationMatrixCreatorApplication.cpp" "Applications/ApplicationInterface.cpp")
-target_link_libraries(${EXE_MAIN_BAMMATRIX} GenomicTools Boost::program_options)
+target_link_libraries(${EXE_MAIN_BAMMATRIX} GenomicTools Utility Boost::program_options)
 set_target_properties(${EXE_MAIN_BAMMATRIX} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
-## an ChIPPartitioning standalone
-set(EXE_CHIPPART "ChIPPartitioning")
-add_executable(${EXE_CHIPPART}   "Applications/ChIPPartitioningApplication.cpp" "Applications/ApplicationInterface.cpp")
-target_link_libraries(${EXE_CHIPPART} Clustering Utility Boost::program_options)
-set_target_properties(${EXE_CHIPPART} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
-## an executable to compute classes references from the data and the post prob of ChIPPartitioning
-set(EXE_PROB2REF "probToModel")
+## an application to create a sequence matrix from BED and a fasta file
+set(EXE_MAIN_SEQMATRIX "SequenceMatrixCreator")
+add_executable(${EXE_MAIN_SEQMATRIX} "Applications/SequenceMatrixCreatorApplication.cpp" "Applications/ApplicationInterface.cpp")
+target_link_libraries(${EXE_MAIN_SEQMATRIX} GenomicTools Utility Boost::program_options)
+set_target_properties(${EXE_MAIN_SEQMATRIX} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
+## an EMRead standalone
+set(EXE_EMREAD "EMRead")
+add_executable(${EXE_EMREAD}   "Applications/EMReadApplication.cpp" "Applications/ApplicationInterface.cpp")
+target_link_libraries(${EXE_EMREAD} Clustering Utility Boost::program_options)
+set_target_properties(${EXE_EMREAD} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
+## an EMSequence standalone
+set(EXE_EMSEQ "EMSequence")
+add_executable(${EXE_EMSEQ}   "Applications/EMSequenceApplication.cpp" "Applications/ApplicationInterface.cpp")
+target_link_libraries(${EXE_EMSEQ} Clustering Utility Boost::program_options)
+set_target_properties(${EXE_EMSEQ} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
+## an EMJoint standalone
+set(EXE_EMJOINT "EMJoint")
+add_executable(${EXE_EMJOINT}   "Applications/EMJointApplication.cpp" "Applications/ApplicationInterface.cpp")
+target_link_libraries(${EXE_EMJOINT} Clustering Utility Boost::program_options)
+set_target_properties(${EXE_EMJOINT} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
+## an executable to compute data models from the data and the post prob of an EM classification
+set(EXE_PROB2REF "ProbToModel")
 add_executable(${EXE_PROB2REF}   "Applications/ProbToModelApplication.cpp" "Applications/ApplicationInterface.cpp")
 target_link_libraries(${EXE_PROB2REF} Clustering Utility Boost::program_options)
 set_target_properties(${EXE_PROB2REF} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
+## an executable to extend read models from an EM classification
+set(EXE_READMODELEXTENDER "ReadModelExtender")
+add_executable(${EXE_READMODELEXTENDER}   "Applications/ReadModelExtenderApplication.cpp" "Applications/ApplicationInterface.cpp")
+target_link_libraries(${EXE_READMODELEXTENDER} Clustering GenomicTools Utility Boost::program_options)
+set_target_properties(${EXE_READMODELEXTENDER} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
+
+## an executable to extend read models from an EM classification
+set(EXE_SEQUENCEMODELEXTENDER "SequenceModelExtender")
+add_executable(${EXE_SEQUENCEMODELEXTENDER}   "Applications/SequenceModelExtenderApplication.cpp" "Applications/ApplicationInterface.cpp")
+target_link_libraries(${EXE_SEQUENCEMODELEXTENDER} Clustering GenomicTools Utility Boost::program_options)
+set_target_properties(${EXE_SEQUENCEMODELEXTENDER} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
+
 ## a test suite
 set(EXE_TESTS "unittests")
 add_executable(${EXE_TESTS} "unittests.cpp"
                             "Unittests/unittests_matrix.cpp"
                             "Unittests/unittests_genomictools.cpp")
 target_link_libraries(${EXE_TESTS} ${UNITTEST_LIB} ${SEQAN_LIBRARIES} GenomicTools)
 set_target_properties(${EXE_TESTS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin")
diff --git a/src/Clustering.old/ClusteringEngine.cpp b/src/Clustering.old/ClusteringEngine.cpp
deleted file mode 100644
index fe69e87..0000000
--- a/src/Clustering.old/ClusteringEngine.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <ClusteringEngine.hpp>
-#include <string>
-
-ClusteringEngine::~ClusteringEngine()
-{}
-
diff --git a/src/Clustering.old/ClusteringEngine.hpp b/src/Clustering.old/ClusteringEngine.hpp
deleted file mode 100644
index ecfc47b..0000000
--- a/src/Clustering.old/ClusteringEngine.hpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef CLUSTERINGENGINE_HPP
-#define CLUSTERINGENGINE_HPP
-
-#include <Matrix2D.hpp>
-
-/*!
- * \brief The ClusteringEngine class is an abstract class providing an interface
- * to other classes implementing data clustering methods.
- */
-class ClusteringEngine
-{
-    public:
-        /*!
-         * \brief The possible exit codes for the cluster method.
-         * 0 the clustering procedure converged, 1 the clustering
-         * procedure succeeded without converging, 2 the clustering
-         * failed.
-         */
-        enum exit_codes {CONVERGENCE=0, SUCCESS, FAILURE, NCODE=3} ;
-
-    public:
-        /*!
-         * \brief Destructor.
-         */
-        virtual ~ClusteringEngine() ;
-
-        /*!
-         * \brief Runs the clustering.
-         * \return an exit code indicating whether how the clustering
-         * ended.
-         */
-        virtual exit_codes cluster() = 0 ;
-
-} ;
-
-#endif // CLUSTERINGENGINE_HPP
diff --git a/src/Clustering.old/EMEngine.cpp b/src/Clustering.old/EMEngine.cpp
deleted file mode 100644
index 3b574a1..0000000
--- a/src/Clustering.old/EMEngine.cpp
+++ /dev/null
@@ -1,807 +0,0 @@
-#include <EMEngine.hpp>
-#include <ClusteringEngine.hpp>
-#include <Matrix2D.hpp>
-#include <Matrix3D.hpp>
-#include <Matrix4D.hpp>
-#include <Random.hpp>                         // rand_real_uniform(), rand_int_uniform()
-#include <RandomNumberGenerator.hpp>          // getRandomNumberGenerator()
-#include <Statistics.hpp>                     // beta_pmf(), poisson_pmf(), normal_pmf(), sd()
-#include <ConsoleProgressBar.hpp>             // ConsoleProgressBar
-#include <ThreadPool.hpp>                     // ThreadPool
-#include <cmath>                              // log(), exp(), pow()
-#include <vector>
-#include <limits>                             // numeric_limits
-#include <boost/random.hpp>                   // uniform_real, variate_generator
-#include <future>                             // future, promise
-#include <utility>                            // move()
-#include <functional>                         // bind(), ref()
-
-#include <matrices.hpp>
-
-EMEngine::EMEngine(const Matrix2D<int>& data,
-                   size_t n_class,
-                   size_t n_iter,
-                   size_t n_shift,
-                   bool flip,
-                   EMEngine::seeding_codes seeding,
-                   const std::string& seed,
-                   size_t n_threads)
-    : flip(flip), n_iter(n_iter), n_shift(n_shift), n_flip(flip+1), n_class(n_class),
-      n_row(data.get_nrow()), n_col(data.get_ncol()), l_slice(n_col - n_shift + 1),
-      seeding_method(seeding), n_threads(n_threads), threads(n_threads)
-{
-    // initialise random number generator
-    getRandomGenerator(seed) ;
-
-    // copy the data
-    this->data = matrix2d_i(this->n_row, v_i(this->n_col)) ;
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_col; j++)
-        {   this->data[i][j] = data(i,j) ; }
-    }
-
-}
-
-EMEngine::~EMEngine()
-{   this->threads.join() ; }
-
-Matrix2D<double> EMEngine::get_references() const
-{
-    Matrix2D<double> references(this->n_class, this->l_slice, 0.) ;
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->l_slice; j++)
-        {   references(i,j) = this->references[i][j] ; }
-    }
-    return references ;
-
-}
-
-Matrix4D<double> EMEngine::get_posterior_prob() const
-{   Matrix4D<double> post_prob(this->n_row, this->n_class, this->n_shift, this->n_flip, 0.) ;
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t k=0; k<this->n_class; k++)
-        {   for(size_t s=0; s<this->n_shift; s++)
-            {   for(size_t f=0; f<this->n_flip; f++)
-                {   post_prob(i,k,s,f) = this->post_prob[i][k][s][f] ; }
-            }
-        }
-    }
-    return post_prob ;
-}
-
-/*
-// this is the naive way, it is exact but results in Nan, -Nan, -Inf, +Inf
-// sometimes...
-double EMEngine::get_loglikelihood0() const
-{
-    double ll = 0 ;
-    for(size_t i=0; i<this->n_row; i++)
-    {   double p_tmp = 0. ;
-        for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t s=0; s<this->n_shift; s++)
-            {   // slice is [from_fw,to)
-                //    from_dat_fw             to_dat_fw    [from_dat_fw, to_dat_fw]
-                // fw  |---------->>>----------|
-                //     ----------------------------------> data
-                // rev           |----------<<<----------| [from_dat_rev, to_dat_rev]
-                //                                         to_dat_rev can be -1 -> int
-                //            to_dat_rev             from_dat_rev
-
-                // log likelihood
-                // --------------- forward ---------------
-                double lp_fw = 0. ;
-                int from_dat_fw = s ;
-                int to_dat_fw   = from_dat_fw + this->l_slice - 1 ;
-                for(int j_dat_fw=from_dat_fw, j_ref_fw=0;
-                    j_dat_fw<to_dat_fw; j_dat_fw++, j_ref_fw++)
-                {
-                    double lp = std::max(log(poisson_pmf(this->data[i][j_dat_fw],
-                                                this->references[j][j_ref_fw]*
-                                                this->window_mean[i][s])),
-                                         EMEngine::p_min_log) ;
-                    lp_fw += lp ;
-
-                p_tmp += exp(lp_fw) * this->class_prob[j][s][flip_states::FORWARD] ;
-                // --------------- reverse ---------------
-                if(this->flip)
-                {   double lp_rev = 0. ;
-                    int from_dat_rev = this->n_col - 1 - s ;
-                    int to_dat_rev   = from_dat_rev - (this->l_slice - 1) ;
-                    int shift_rev    = this->n_shift - s - 1 ;
-                    for(int j_dat_rev=from_dat_rev, j_ref_fw=0;
-                        j_dat_rev >= to_dat_rev; j_dat_rev--, j_ref_fw++)
-                    {   double lp = std::max(log(poisson_pmf(this->data[i][j_dat_rev],
-                                                    this->references[j][j_ref_fw]*
-                                                    this->window_mean[i][shift_rev])),
-                                             EMEngine::p_min_log) ;
-                        lp_rev += lp ;
-                    }
-
-                    p_tmp += exp(lp_rev) * this->class_prob[j][s][flip_states::REVERSE] ;
-                }
-            }
-        }
-        ll += log(p_tmp) ;
-    }
-    return ll ;
-}
-*/
-
-double EMEngine::get_loglikelihood() const
-{
-    double ll = 0. ;
-
-    // compute all terms needed
-    for(size_t i=0; i<this->n_row; i++)
-    {   double prob_tmp = 0 ;
-        for(size_t j=0; j<this->n_class; j++)
-        {   std::vector<std::pair<double,double>> v3 ;
-            for(size_t s=0; s<this->n_shift; s++)
-            {
-                // slice is [from_fw,to)
-                //    from_dat_fw             to_dat_fw    [from_dat_fw, to_dat_fw]
-                // fw  |---------->>>----------|
-                //     ----------------------------------> data
-                // rev           |----------<<<----------| [from_dat_rev, to_dat_rev]
-                //                                         to_dat_rev can be -1 -> int
-                //            to_dat_rev             from_dat_rev
-
-                // log likelihood
-                // --------------- forward ---------------
-                double lp_fw = 0. ;
-                int from_dat_fw = s ;
-                int to_dat_fw   = from_dat_fw + this->l_slice - 1 ;
-                for(int j_dat_fw=from_dat_fw, j_ref_fw=0;
-                    j_dat_fw<to_dat_fw; j_dat_fw++, j_ref_fw++)
-                {
-                    double lp = log(std::max(poisson_pmf(this->data[i][j_dat_fw],
-                                                this->references[j][j_ref_fw]*
-                                                this->window_mean[i][s]),
-                                    EMEngine::p_min)) ;
-                    lp_fw += lp ;
-                }
-                double p_fw = this->class_prob[j][s][flip_states::FORWARD] ;
-                v3.push_back(std::make_pair(lp_fw, p_fw)) ;
-
-                // --------------- reverse ---------------
-                if(this->flip)
-                {   double lp_rev = 0. ;
-                    int from_dat_rev = this->n_col - 1 - s ;
-                    int to_dat_rev   = from_dat_rev - (this->l_slice - 1) ;
-                    int shift_rev    = this->n_shift - s - 1 ;
-                    for(int j_dat_rev=from_dat_rev, j_ref_fw=0;
-                        j_dat_rev >= to_dat_rev; j_dat_rev--, j_ref_fw++)
-                    {   double lp = log(std::max(poisson_pmf(this->data[i][j_dat_rev],
-                                                    this->references[j][j_ref_fw]*
-                                                    this->window_mean[i][shift_rev]),
-                                        EMEngine::p_min)) ;
-                        lp_rev += lp ;
-                    }
-                    double p_rev = this->class_prob[j][s][flip_states::REVERSE] ;
-                    v3.push_back(std::make_pair(lp_rev, p_rev)) ;
-                }
-            }
-            prob_tmp += sum_exp(v3) ;
-        }
-        ll += log(prob_tmp) ;
-    }
-    return ll ;
-}
-
-double EMEngine::get_aic() const
-{   double ll = this->get_loglikelihood() ;
-    double n_param = ((double) this->n_class *
-                      (double)this->l_slice) +
-                     ((double)this->n_shift *
-                      (double)this->flip+1. *
-                      (double)this->n_class) - 1. ;
-    // std::cerr << "AIC = " << (2.*n_param) << " - " << ll << std::endl ;
-    return (2.*n_param) - (2.*ll) ;
-}
-
-ClusteringEngine::exit_codes EMEngine::cluster()
-{   size_t bar_update_n = this->n_iter + 1 ;
-    ConsoleProgressBar bar(std::cerr, bar_update_n, 70, "clustering") ;
-
-    // construct all other required data structures
-    // mean number of reads per window
-    this->window_mean       = matrix2d_d(this->n_row, v_d(this->n_shift, 0.)) ;
-    this->compute_window_means() ;
-
-    // the references
-    this->references        = matrix2d_d(this->n_class,
-                                       v_d(this->l_slice, 0.)) ;
-    // log loglikelihood
-    this->loglikelihood     = matrix4d_d(this->n_row,
-                                       matrix3d_d(this->n_class,
-                                                matrix2d_d(this->n_shift,
-                                                         v_d(this->n_flip, 9.)))) ;
-    this->loglikelihood_max = v_d(this->n_row, 0.) ;
-
-    // posterior prob
-    this->post_prob         = matrix4d_d(this->n_row,
-                                       matrix3d_d(this->n_class,
-                                                matrix2d_d(this->n_shift,
-                                                         v_d(this->n_flip, 0.)))) ;
-    this->class_prob        = matrix3d_d(this->n_class,
-                                         matrix2d_d(this->n_shift,
-                                                    v_d(this->n_flip, 0.))) ;
-    this->class_prob_tot  = v_d(this->n_class, 0.) ;
-    this->post_prob_row   = v_d(this->n_row, 0.) ;
-    this->post_prob_class = v_d(this->n_class, 0.) ;
-    this->post_prob_tot   = 0. ;
-
-    // seeding
-    this->seeding(this->seeding_method) ;
-    bar.update() ;
-
-    // optimize the partition
-    for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
-    {
-        // normalize the references such thjat the mean value, on each
-        // row, is 1
-        this->normalize_references() ;
-        // E-step
-        this->compute_loglikelihood() ;
-        this->compute_post_prob() ;
-        // M-step
-        this->compute_class_prob() ;
-        this->compute_references() ;
-        this->center_shifts() ;
-        // bar.update() ;
-    }
-    bar.update() ; std::cerr << std::endl ;
-    return ClusteringEngine::exit_codes::SUCCESS ;
-}
-
-void EMEngine::normalize_references()
-{
-    for(size_t i=0; i<this->n_class; i++)
-    {   double mean = 0. ;
-        for(size_t j=0; j<this->l_slice; j++)
-        {   mean += this->references[i][j] ; }
-        mean /= this->l_slice ;
-        for(size_t j=0; j<this->l_slice; j++)
-        {   this->references[i][j] /= mean ; }
-    }
-}
-
-void EMEngine::seeding(EMEngine::seeding_codes seeding)
-{
-    if(seeding == EMEngine::seeding_codes::RANDOM)
-    {   this->seeding_random() ; }
-    else if(seeding == EMEngine::seeding_codes::SAMPLING)
-    {   this->seeding_sampling() ; }
-    else if(seeding == EMEngine::seeding_codes::TOY)
-    {   this->seeding_toy() ; }
-}
-
-void EMEngine::seeding_random()
-{
-    // get random values from a beta distribution cannot be done using boost so
-    // i) generate random number [0,1] x
-    // ii) compute f(x) where f is beta distribution
-
-    matrix2d_d prob(this->n_row, v_d(this->n_class, 0.)) ;
-    v_d        prob_class(this->n_class, 0.) ;
-    double tot_sum = 0. ;
-
-    // sample the prob
-    // beta distribution parameters
-    double alpha = pow(this->n_row, -0.5) ;
-    double beta  = 1. ;
-    for(size_t i=0; i<this->n_row; i++)
-    {   double row_sum = 0. ;
-        for(size_t j=0; j<this->n_class; j++)
-        {   double x = rand_real_uniform(0., 1.0) ;
-            double p = std::max(EMEngine::p_min, beta_pmf(x, alpha, beta)) ;
-            prob[i][j]     = p ;
-            prob_class[j] += p ;
-            tot_sum       += p ;
-            row_sum       += p ;
-        }
-        // normalize
-        for(size_t j=0; j<this->n_class; j++)
-        {   prob[i][j] /= row_sum ; }
-    }
-
-    // class prob
-    for(auto& p : prob_class)
-    {   p /= tot_sum ; }
-
-    // compute the refererences
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_slice; j_ref++, j_dat++)
-            {   this->references[j][j_ref] += (this->data[i][j_dat] * prob[i][j]) ; }
-        }
-    }
-    // normalize
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->l_slice; j++)
-        {   this->references[i][j] ; }
-    }
-
-    // set the class probabilities to a uniform distribution
-    double sum = this->n_class * this->n_shift * this->n_flip ;
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->n_shift; j++)
-        {   for(size_t k=0; k<this->n_flip; k++)
-            {   this->class_prob[i][j][k] = 1./sum ; }
-        }
-    }
-}
-
-void EMEngine::seeding_sampling()
-{
-    // sample data to initialise the references
-    std::vector<bool> choosen(this->n_row, false) ;
-
-    for(size_t i=0; i<this->n_class; )
-    {   size_t index = rand_int_uniform(size_t(0), size_t(this->n_row-1)) ;
-        // already choose
-        if(choosen[index])
-        {   ; }
-        // not yet choosen as reference
-        else
-        {   for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_slice; j_ref++, j_dat++)
-            {   this->references[i][j_ref] = this->data[index][j_dat] ; }
-            choosen[index] = true ;
-            i++ ;
-        }
-    }
-
-    // set the class probabilities to a uniform distribution
-    double sum = this->n_class * this->n_shift * this->n_flip ;
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->n_shift; j++)
-        {   for(size_t k=0; k<this->n_flip; k++)
-            {   this->class_prob[i][j][k] = 1. / sum ;
-            }
-        }
-    }
-}
-
-void EMEngine::seeding_toy()
-{
-    // sample data to initialise the references
-    std::vector<bool> choosen(this->n_row, false) ;
-
-    for(size_t i=0; i<this->n_class; )
-    {   size_t index = i ;
-        // already choose
-        if(choosen[index])
-        {   ; }
-        // not yet choosen as reference
-        else
-        {   for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_slice; j_ref++, j_dat++)
-            {   this->references[i][j_ref] = this->data[index][j_dat] ; }
-            choosen[index] = true ;
-            i++ ;
-        }
-    }
-
-    // set the class probabilities to a uniform distribution
-    double sum = this->n_class * this->n_shift * this->n_flip ;
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->n_shift; j++)
-        {   for(size_t k=0; k<this->n_flip; k++)
-            {   this->class_prob[i][j][k] = 1./sum ; }
-        }
-    }
-}
-
-void EMEngine::compute_window_means()
-{   // compute the slices on which each thread will work
-    std::vector<std::pair<size_t,size_t>> slices =
-            ThreadPool::split_range(0, this->n_row, this->n_threads) ;
-
-    // get promises and futures
-    // the function run by the threads will simply fill the promise with
-    // "true" to indicate that they are done
-    std::vector<std::promise<bool>> promises(this->n_threads) ;
-    std::vector<std::future<bool>>  futures(this->n_threads) ;
-    for(size_t i=0; i<this->n_threads; i++)
-    {   futures[i] = promises[i].get_future() ; }
-
-    // distribute work to threads
-    // -------------------------- threads start --------------------------
-    for(size_t i=0; i<this->n_threads; i++)
-    {   auto slice = slices[i] ;
-        this->threads.addJob(std::move(
-                                 std::bind(&EMEngine::compute_window_means_routine,
-                                           this,
-                                           slice.first,
-                                           slice.second,
-                                           std::ref(promises[i])))) ;
-    }
-    // wait until all threads are done working
-    for(auto& future : futures)
-    {   future.get() ; }
-    // -------------------------- threads stop ---------------------------
-}
-
-void EMEngine::compute_window_means_routine(size_t from,
-                                            size_t to,
-                                            std::promise<bool>& done)
-{
-    double l_slice = double(this->l_slice) ;
-    for(size_t i=from; i<to; i++)
-    {   for(size_t from=0; from<this->n_shift; from++)
-        {   double sum = 0. ;
-            // slice is [from,to)
-            size_t to = from + this->l_slice ;
-            for(size_t j=from; j<to; j++)
-            {   sum += this->data[i][j] ;}
-            this->window_mean[i][from] = sum / l_slice ;
-        }
-    }
-    done.set_value(true) ;
-}
-
-void EMEngine::compute_loglikelihood()
-{
-    // compute the slices on which each thread will work
-    std::vector<std::pair<size_t,size_t>> slices =
-            ThreadPool::split_range(0, this->n_row, this->n_threads) ;
-
-    // get promises and futures
-    // the function run by the threads will simply fill the promise with
-    // "true" to indicate that they are done
-    std::vector<std::promise<bool>> promises(this->n_threads) ;
-    std::vector<std::future<bool>>  futures(this->n_threads) ;
-    for(size_t i=0; i<this->n_threads; i++)
-    {   futures[i] = promises[i].get_future() ; }
-
-    // distribute work to threads
-    // -------------------------- threads start --------------------------
-    for(size_t i=0; i<this->n_threads; i++)
-    {   auto slice = slices[i] ;
-        this->threads.addJob(std::move(
-                                 std::bind(&EMEngine::compute_loglikelihood_routine,
-                                           this,
-                                           slice.first,
-                                           slice.second,
-                                           std::ref(promises[i])))) ;
-    }
-    // wait until all threads are done working
-    for(auto& future : futures)
-    {   future.get() ; }
-    // -------------------------- threads stop ---------------------------
-}
-
-void EMEngine::compute_loglikelihood_routine(size_t from, size_t to, std::promise<bool>& done)
-{
-    // access in writing
-    // this->loglikelihood     -> only access the i-th which belong [from,to)
-    // this->loglikelihood_max -> only access the i-th which belong [from,to)
-
-    for(size_t i=from; i<to; i++)
-    {   // set max to min possible value
-        this->loglikelihood_max[i] = std::numeric_limits<double>::lowest() ;
-
-        for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t s_fw=0, s_rev=this->n_shift-1;
-                s_fw<this->n_shift; s_fw++, s_rev--)
-            {   // slice is [from_fw,to)
-                //    from_dat_fw             to_dat_fw    [from_dat_fw, to_dat_fw]
-                // fw  |---------->>>----------|
-                //     ----------------------------------> data
-                // rev           |----------<<<----------| [from_dat_rev, to_dat_rev]
-                //                                         to_dat_rev can be -1 -> int
-                //            to_dat_rev             from_dat_rev
-
-                // log likelihood
-                double ll_fw  = 0. ;
-                double ll_rev = 0. ;
-                // --------------- forward ---------------
-                size_t from_dat_fw   = s_fw ;
-                size_t to_dat_fw     = from_dat_fw + this->l_slice - 1 ;
-                // --------------- reverse ---------------
-                size_t from_dat_rev = this->n_col - 1 - s_fw ;
-                // size_t to_dat_rev   = from_dat_rev - (this->l_slice - 1) ;
-
-                for(size_t j_dat_fw=from_dat_fw,j_ref_fw=0, j_dat_rev=from_dat_rev;
-                    j_dat_fw<to_dat_fw;
-                    j_dat_fw++, j_ref_fw++, j_dat_rev--)
-                {
-                    double ll ;
-                    // --------------- forward ---------------
-                    ll = log(poisson_pmf(this->data[i][j_dat_fw],
-                                         this->references[j][j_ref_fw]*
-                                         this->window_mean[i][s_fw])) ;
-                    ll_fw += std::max(ll, EMEngine::p_min_log) ;
-                    // --------------- reverse ---------------
-                    if(this->flip)
-                    {   ll = log(poisson_pmf(this->data[i][j_dat_rev],
-                                             this->references[j][j_ref_fw]*
-                                             this->window_mean[i][s_rev])) ;
-                        ll_rev += std::max(ll, EMEngine::p_min_log) ;
-                    }
-                }
-                this->loglikelihood[i][j][from_dat_fw][flip_states::FORWARD] = ll_fw  ;
-                // keep track of the max per row
-                if(ll_fw > this->loglikelihood_max[i])
-                {   this->loglikelihood_max[i] = ll_fw ; }
-
-                if(this->flip)
-                {   this->loglikelihood[i][j][from_dat_fw][flip_states::REVERSE] = ll_rev ;
-                    // keep track of the max per row
-                    if(ll_rev > this->loglikelihood_max[i])
-                    {   this->loglikelihood_max[i] = ll_rev ; }
-                }
-            }
-        }
-    }
-    // fill the promise to indicate that the function exited
-    done.set_value(true) ;
-}
-
-void EMEngine::compute_post_prob()
-{
-    // compute the slices on which each thread will work
-    std::vector<std::pair<size_t,size_t>> slices =
-            ThreadPool::split_range(0, this->n_row, this->n_threads) ;
-
-    // get promises and futures
-    // the function run by the threads will compute
-    // the partial sum per class of post_prob for the given slice
-    // this should be used to compute the complete sum of post_prob
-    // and the complete sum per class of post_prob
-    std::vector<std::promise<v_d>> promises(this->n_threads) ;
-    std::vector<std::future<v_d>>  futures(this->n_threads) ;
-    for(size_t i=0; i<this->n_threads; i++)
-    {   futures[i] = promises[i].get_future() ; }
-
-    // distribute work to threads
-    // -------------------------- threads start --------------------------
-    for(size_t i=0; i<this->n_threads; i++)
-    {   auto slice = slices[i] ;
-        this->threads.addJob(std::move(
-                                 std::bind(&EMEngine::compute_post_prob_routine,
-                                           this,
-                                           slice.first,
-                                           slice.second,
-                                           std::ref(promises[i])))) ;
-    }
-    // wait until all threads are done working
-    // compute the sum of post prob and the per class sum of post prob
-    // from the partial results computed on each slice
-    this->post_prob_tot = 0. ;
-    this->post_prob_class = v_d(this->n_class, 0.) ;
-    for(auto& future : futures)
-    {   auto probs = future.get() ;
-        for(size_t i=0; i<this->n_class; i++)
-        {   double prob = probs[i] ;
-            this->post_prob_class[i] += prob ;
-            this->post_prob_tot      += prob ;
-        }
-    }
-    // -------------------------- threads stop ---------------------------
-}
-
-void EMEngine::compute_post_prob_routine(size_t from,
-                                         size_t to,
-                                         std::promise<v_d>& done)
-{
-    // this->post_prob_row    -> only access the i-th which belong [from,to)
-    // this->post_prob        -> only access the i-th which belong [from,to)
-
-    // some values that needs to be returned
-    // the total of the posterior prob for this slice of the data
-    // the total per class of posterior prob for this slice of the data
-    v_d post_prob_class(this->n_class, 0.) ;
-
-    for(size_t i=from; i<to; i++)
-    {   // reset row sum to 0
-        this->post_prob_row[i] = 0. ;
-
-        for(size_t n_class=0; n_class<this->n_class; n_class++)
-        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
-                {   /*
-                    double p = exp(this->loglikelihood[i][n_class][n_shift][n_flip] -
-                                                       this->loglikelihood_max[i]) *
-                            this->class_prob[n_class][n_shift][n_flip] ;
-                    */
-                    double p = std::max(exp(this->loglikelihood[i][n_class][n_shift][n_flip] -
-                                            this->loglikelihood_max[i]) *
-                                            this->class_prob[n_class][n_shift][n_flip],
-                                        EMEngine::p_min) ;
-                    this->post_prob[i][n_class][n_shift][n_flip] = p ;
-                    this->post_prob_row[i] += p ;
-                }
-            }
-        }
-        // normalize
-        for(size_t n_class=0; n_class<this->n_class; n_class++)
-        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
-                {   this->post_prob[i][n_class][n_shift][n_flip] /=
-                            this->post_prob_row[i] ;
-                    double p = this->post_prob[i][n_class][n_shift][n_flip] ;
-                    post_prob_class[n_class] += p ;
-                }
-            }
-        }
-    }
-
-    done.set_value(post_prob_class) ;
-}
-
-void EMEngine::compute_class_prob()
-{
-    for(size_t n_class=0; n_class<this->n_class; n_class++)
-    {   // reset total
-        this->class_prob_tot[n_class] = 0. ;
-        for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-        {   for(size_t flip=0; flip<this->n_flip; flip++)
-            {   // sum
-                this->class_prob[n_class][n_shift][flip] = 0. ;
-                for(size_t i=0; i<this->n_row; i++)
-                {   this->class_prob[n_class][n_shift][flip] +=
-                                                this->post_prob[i][n_class][n_shift][flip] ;
-                }
-                // normalize
-                this->class_prob[n_class][n_shift][flip] /= this->post_prob_tot ;
-                this->class_prob_tot[n_class] += this->class_prob[n_class][n_shift][flip] ;
-            }
-        }
-    }
-}
-
-void EMEngine::compute_references()
-{
-    // compute the slices on which each thread will work
-    std::vector<std::pair<size_t,size_t>> slices =
-            ThreadPool::split_range(0, this->n_row, this->n_threads) ;
-
-    // get promises and futures
-    // the function run by the threads will compute
-    // the reference from the given slice
-    std::vector<std::promise<matrix2d_d>> promises(this->n_threads) ;
-    std::vector<std::future<matrix2d_d>>  futures(this->n_threads) ;
-    for(size_t i=0; i<this->n_threads; i++)
-    {   futures[i] = promises[i].get_future() ; }
-
-    // distribute work to threads
-    // -------------------------- threads start --------------------------
-    for(size_t i=0; i<this->n_threads; i++)
-    {   auto& slice = slices[i] ;
-        this->threads.addJob(std::move(
-                                 std::bind(&EMEngine::compute_references_routine,
-                                           this,
-                                           slice.first,
-                                           slice.second,
-                                           std::ref(promises[i])))) ;
-    }
-    // while threads are working, reset the references
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->l_slice; j++)
-        {   this->references[i][j] = 0. ; }
-    }
-    // wait until all threads are done working
-    // sum the partial class references to get the complete ones
-    for(size_t n=0; n<this->n_threads; n++)
-    {   matrix2d_d reference = futures[n].get() ;
-        for(size_t i=0; i<this->n_class; i++)
-        {   for(size_t j=0; j<this->l_slice; j++)
-            {   this->references[i][j] += reference[i][j] ; }
-        }
-    }
-    // -------------------------- threads stop ---------------------------
-}
-
-void EMEngine::compute_references_routine(size_t from, size_t to, std::promise<matrix2d_d>& references)
-{   // the empty references
-    matrix2d_d ref(this->n_class, v_d(this->l_slice, 0.)) ;
-
-    for(size_t n_class=0; n_class < this->n_class; n_class++)
-    {
-        for(size_t i=from; i<to; i++)
-        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-            {   // --------------- forward ---------------
-                int from_dat_fw = n_shift ;
-                int to_dat_fw   = from_dat_fw + this->l_slice - 1 ;
-                for(int j_dat_fw=from_dat_fw, j_ref_fw=0;
-                    j_dat_fw<=to_dat_fw; j_dat_fw++, j_ref_fw++)
-                {   ref[n_class][j_ref_fw] +=
-                            (this->post_prob[i][n_class][n_shift][flip_states::FORWARD] * this->data[i][j_dat_fw]) /
-                            this->post_prob_class[n_class] ;
-                }
-                // --------------- reverse ---------------
-                if(this->flip)
-                {   int from_dat_rev = this->n_col - 1 - n_shift ;
-                    int to_dat_rev   = from_dat_rev - (this->l_slice - 1) ;
-                    for(int j_dat_rev=from_dat_rev, j_ref_fw=0;
-                        j_dat_rev >= to_dat_rev; j_dat_rev--, j_ref_fw++)
-                    {   ref[n_class][j_ref_fw] +=
-                                (this->post_prob[i][n_class][n_shift][flip_states::REVERSE] * this->data[i][j_dat_rev]) /
-                                this->post_prob_class[n_class] ;
-                    }
-                }
-            }
-        }
-    }
-    references.set_value(ref) ;
-}
-
-void EMEngine::center_shifts()
-{
-    if(this->n_shift == 1)
-    {   return ; }
-
-    // the possible shift states
-    std::vector<double> shifts(this->n_shift) ;
-    std::iota(shifts.begin(), shifts.end(), 1.) ;
-
-    // the shift probabilities and the class probabilies (no need to norm., class_prob sums to 1)
-    double shifts_prob_measured_tot = 0. ;
-    std::vector<double> shifts_prob_measured(this->n_shift) ;
-    for(size_t s=0; s<this->n_shift; s++)
-    {   for(size_t k=0; k<this->n_class; k++)
-        {   for(size_t f=0; f<this->n_flip; f++)
-            {   shifts_prob_measured[s]  += this->class_prob[k][s][f] ;
-                shifts_prob_measured_tot += this->class_prob[k][s][f] ;
-            }
-        }
-    }
-
-
-    // the shift mean and (biased) standard deviation
-    double shifts_sd = sd(shifts, shifts_prob_measured, false) ;
-
-    // the shift probabilities under the assumption that is distributed as a gaussian centered on
-    // the central shift state with sd and mean as in the data
-    // sd as the data
-    std::vector<double> shifts_prob_centered(shifts.size(), 0.) ;
-    double shifts_prob_centered_tot = 0. ;
-    for(size_t i=0; i<shifts.size(); i++)
-    {   shifts_prob_centered[i]   = normal_pmf(shifts[i], (this->n_shift/2)+1, shifts_sd) ;
-        shifts_prob_centered_tot += shifts_prob_centered[i] ;
-    }
-
-    for(size_t k=0; k<this->n_class; k++)
-    {   for(size_t f=0; f<this->n_flip; f++)
-        {   for(size_t s=0; s<this->n_shift; s++)
-            {   this->class_prob[k][s][f] = this->class_prob_tot[k] * shifts_prob_centered[s] /
-                        (this->n_flip * shifts_prob_centered_tot) ;
-            }
-        }
-    }
-
-    // shifts_prob_measured_tot = 0. ;
-    shifts_prob_measured.clear() ;
-    shifts_prob_measured.resize(this->n_shift) ;
-    for(size_t s=0; s<this->n_shift; s++)
-    {   for(size_t k=0; k<this->n_class; k++)
-        {   for(size_t f=0; f<this->n_flip; f++)
-            {   shifts_prob_measured[s]  += this->class_prob[k][s][f] ;
-            }
-        }
-    }
-}
-
-const double EMEngine::p_min = 1e-100 ;
-const double EMEngine::p_min_log = log(EMEngine::p_min) ;
-
-#include <algorithm>
-
-double sum_exp(const std::vector<std::pair<double, double>>& v)
-{
-    double result = 0. ;
-    // double max = *std::max_element(lp.begin(), lp.end()) ;
-
-    double max = std::numeric_limits<double>::lowest() ;
-    for(const auto& i : v)
-    {   if(i.first > max)
-        {   max = i.first ; }
-    }
-
-    // sum
-    for(const auto& i : v)
-    {   result += (exp(i.first - max))*i.second ; }
-    result *= exp(max) ;
-
-    return result ;
-}
diff --git a/src/Clustering.old/EMEngine.hpp b/src/Clustering.old/EMEngine.hpp
deleted file mode 100644
index d4087cf..0000000
--- a/src/Clustering.old/EMEngine.hpp
+++ /dev/null
@@ -1,363 +0,0 @@
-#ifndef EMENGINE_HPP
-#define EMENGINE_HPP
-
-#include <ClusteringEngine.hpp>
-#include <Matrix2D.hpp>
-#include <Matrix4D.hpp>
-#include <ThreadPool.hpp>
-#include <vector>
-#include <string>
-#include <future>  // promise, future
-
-// some typdef
-#include <typedef.hpp>
-
-
-/*!
- * \brief This class implements the iterative expectation
- * maximization classification procedure described in Nair
- * et al. 2014, Bioinformatics.
- * The classification procedure performs a probabilistic
- * partitioning of genomic regions, based on the distribution
- * of the reads over the regions.
- * To mitigate a miss-alignment of the signal in the different
- * regions - that is a same signal strech is present in two
- * regions but at different offsets - the classification
- * procedure can search protypic signals shorter than a whole
- * region, at each possible offset over the region (named
- * shift).
- * To mitigate an inversion of the signal in the different regions
- * - that is a same signal strech is present in two regions but in
- * reverse orientation - the classification procedure can search
- * protypic signals in both orientation.
- */
-class EMEngine : public ClusteringEngine
-{
-    static const double p_min ;
-    static const double p_min_log ;
-
-    public:
-        /*!
-         * \brief The possible seeding strategies.
-         */
-        enum seeding_codes {RANDOM=0, SAMPLING, TOY} ;
-
-        /*!
-         * \brief The possible flip states.
-         */
-        enum flip_states{FORWARD=0, REVERSE} ;
-
-    public:
-        /*!
-         * \brief Constructs an object.
-         * \param data the data to classify.
-         * \param n_class the number of signal classes to search.
-         * \param n_iter the number of iterations.
-         * \param n_shift the shifting freedom. 1 means no shift.
-         * \param flip whether flipping is allowed.
-         * \param n_threads the number of threads dedicated to the
-         * computations.
-         */
-        EMEngine(const Matrix2D<int>& data,
-                 size_t n_class,
-                 size_t n_iter,
-                 size_t n_shift,
-                 bool flip,
-                 seeding_codes seeding,
-                 const std::string& seed=std::string(""),
-                 size_t n_threads=1) ;
-
-        /*!
-         * \brief Destructor.
-         */
-        virtual ~EMEngine() override ;
-
-        /*!
-         * \brief Returns a matrix with the class class references
-         * (protypic signal), on each row.
-         * \return a matrix containing the class references, on
-         * each row.
-         */
-        virtual Matrix2D<double> get_references() const ;
-
-        /*!
-         * \brief Returns a matrix with the posterior probabilies
-         * with the dimensions representing the data, classes, shifts
-         * and flips respectively.
-         * \return a matrix containing the posterior probabilities.
-         */
-        virtual Matrix4D<double> get_posterior_prob() const ;
-
-        /*!
-         * \brief Returns the likelihood of the partition.
-         * \return the likelihood of the partition.
-         */
-        virtual double get_loglikelihood() const ;
-
-        /*!
-         * \brief Returns the Akaike Information Criterion (AIC)
-         * for the given partition.
-         * The AIC is 2n - 2LL where <n> is the number of
-         * free parameters in the model and LL the log
-         * likelihood of the partition.
-         * \return the partition AIC.
-         */
-        virtual double get_aic() const ;
-
-        /*!
-         * \brief Runs the data clustering.
-         * \return
-         */
-        virtual ClusteringEngine::exit_codes cluster() override ;
-
-    protected:
-        /*!
-         * \brief Default constructor.
-         */
-        EMEngine() = default ;
-
-        /*!
-         * \brief Sets each class protypic signal to 1 count,
-         * in average.
-         */
-        virtual void normalize_references() ;
-
-        /*!
-         * \brief Initialises the references using the corresponding
-         * method.
-         * \param seeding the method to use.
-         */
-        virtual void seeding(seeding_codes seeding) ;
-
-        /*!
-         * \brief Initialises the references randomly.
-         * Generates the initial references by randomly assigning
-         * the data to the classes using a beta distribution and
-         * all classes are set equally likely.
-         */
-        virtual void seeding_random() ;
-
-        /*!
-         * \brief Initialises the K references by randomly
-         * sampling K rows in the data. The class are set
-         * equally probable.
-         */
-        virtual void seeding_sampling() ;
-
-        /*!
-         * \brief Initialises the K references using the first K
-         * rows in data. The class are set equally probable.
-         */
-        virtual void seeding_toy() ;
-
-        /*!
-         * \brief Computes the mean number of reads present in
-         * each slice (of length ncol - shift + 1), in each row
-         * of the data and store them in this->window_mean.
-         */
-        virtual void compute_window_means() ;
-
-        /*!
-         * \brief The routine that effectively computes the mean
-         * number of reads present in each slice, for the range
-         * [from,to) of rows in the data.
-         * This function is thread safe only as long as different
-         * [from,to) slices are given to the different threads.
-         * \param from the index of the first row to treat.
-         * \param to the index of the past last row to treat.
-         * \param done a promise filled when the function is done
-         * working. This allows to synchronize threads.
-         */
-        virtual void compute_window_means_routine(size_t from,
-                                                  size_t to,
-                                                  std::promise<bool>& done) ;
-
-        /*!
-         * \brief Computes the data log likelihood given the
-         * current class protypic signals.
-         */
-        virtual void compute_loglikelihood() ;
-
-        /*!
-         * \brief The routine that effectively computes the
-         * log likelihoods for the range [from,to) of rows
-         * in the data. This function is used to distribute
-         * the log likelihood computations over several threads.
-         * This function is thread safe only as long as
-         * different [from,to) slices are given to the different
-         * threads.
-         * \param from the index of the first row to treat.
-         * \param to the index of the past last row to treat.
-         * \param done a promise filled when the function is
-         * done working. This allows to synchronize threads.
-         */
-        virtual void compute_loglikelihood_routine(size_t from,
-                                                   size_t to,
-                                                   std::promise<bool>& done) ;
-
-        /*!
-         * \brief Computes the data posterior probabilties.
-         */
-        virtual void compute_post_prob() ;
-
-        /*!
-         * \brief The routine that effectively computes the
-         * posterior probabilities for the range [from,to) of
-         * rows in the data. This function is used to distribute
-         * the posterior probability computations over several
-         * threads. This function is thread safe only as long
-         * as different [from,to) slices are given to the
-         * differentthreads.
-         * \param from the index of the first row to treat.
-         * \param to the index of the past last row to treat.
-         * \param probs a promise containing a vector with the
-         * sum of the posterior probability, for each class,
-         * computed for the given slice.
-         */
-        virtual void compute_post_prob_routine(size_t from,
-                                               size_t to,
-                                               std::promise<v_d>& probs) ;
-
-        /*!
-         * \brief Computes the class probabilities from the
-         * posterior probabilities.
-         */
-        virtual void compute_class_prob() ;
-
-        /*!
-         * \brief Computes the class aggregations given the
-         * posterior probabilities.
-         */
-        virtual void compute_references() ;
-
-        /*!
-         * \brief A routine that computes the partial class
-         * references for the range [from,to) of rows in the
-         * data. To obtain the full class references, it is
-         * required to 1) run this routine on the whole data
-         * at once or 2) run it on different slices and
-         * sum up the partial references obtained. This function
-         * is used to distribute the posterior probability
-         * computations over several threads. This function is
-         * thread safe only as long as different [from,to) slices
-         * are given to the different threads.
-         * \param from the index of the first row to treat.
-         * \param to the index of the past last row to treat.
-         * \param class_ref a promise containing a matrix with the
-         * partial class references on each row.
-         */
-        virtual void compute_references_routine(size_t from,
-                                                size_t to,
-                                                std::promise<matrix2d_d>& class_ref) ;
-
-        /*!
-         * \brief Modifies the class probabilities in such a
-         * way that the shift probabilities are then normaly
-         * distributed, centered on the middle shift state.
-         * However, the overall class probabilities remain
-         * unchanged.
-         */
-        virtual void center_shifts() ;
-
-    protected:
-        /*!
-         * \brief whether flip is enabled.
-         */
-        bool flip ;
-        /*!
-         * \brief the number of iterations.
-         */
-        size_t n_iter ;
-        /*!
-         * \brief the number of shift states.
-         */
-        size_t n_shift ;
-        /*!
-         * \brief the number of flip states.
-         */
-        size_t n_flip ;
-        /*!
-         * \brief the number of classes.
-         */
-        size_t n_class ;
-
-        /*!
-         * \brief the data.
-         */
-        matrix2d_i data ;
-        /*!
-         * \brief the mean number of reads per window in the
-         * data.
-         */
-        matrix2d_d window_mean ;
-        /*!
-         * \brief the class aggregation signal.
-         */
-        matrix2d_d references ;
-        /*!
-         * \brief the log likelihoods.
-         */
-        matrix4d_d loglikelihood ;
-        /*!
-         * \brief the max log likelihood value for each row.
-         */
-        v_d loglikelihood_max ;
-        /*!
-         * \brief the posterior probabilities.
-         */
-        matrix4d_d post_prob ;
-        /*!
-         * \brief the class probabilities.
-         */
-        matrix3d_d class_prob ;
-        /*!
-         * \brief the total prob per class.
-         */
-        v_d class_prob_tot ;
-
-        /*!
-         * \brief the sum per row of post_prob.
-         */
-        v_d post_prob_row ;
-        /*!
-         * \brief the sum per class of post_prob.
-         */
-        v_d post_prob_class ;
-        /*!
-         * \brief the total of post_prob.
-         */
-        double post_prob_tot ;
-
-        /*!
-         * \brief the number of rows in data.
-         */
-        size_t n_row ;
-        /*!
-         * \brief the number of columns in data.
-         */
-        size_t n_col ;
-        /*!
-         * \brief the size of the pattern search and of
-         * the scanning window in the data.
-         */
-        size_t l_slice ;
-
-        /*!
-         * \brief the seeding method to use.
-         */
-        EMEngine::seeding_codes seeding_method ;
-
-        /*!
-         * \brief the number of threads.
-         */
-        size_t n_threads ;
-        /*!
-         * \brief the threads.
-         */
-        ThreadPool threads ;
-} ;
-
-
-double sum_exp(const std::vector<std::pair<double, double>>& v) ;
-
-#endif // EMENGINE_HPP
diff --git a/src/Clustering.old/ReferenceComputer.cpp b/src/Clustering.old/ReferenceComputer.cpp
deleted file mode 100644
index 352da39..0000000
--- a/src/Clustering.old/ReferenceComputer.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-#include <ReferenceComputer.hpp>
-
-#include <Matrix2D.hpp>
-#include <Matrix4D.hpp>
-
-// some typdef
-#include <typedef.hpp>
-
-
-ReferenceComputer::ReferenceComputer(const Matrix2D<int>& data,
-                                     const Matrix4D<double>& posterior_prob,
-                                     size_t n_threads)
-    : EMEngine(data,
-               posterior_prob.get_dim()[1],
-               1,
-               posterior_prob.get_dim()[2],
-               posterior_prob.get_dim()[3] == 2,
-               EMEngine::seeding_codes::RANDOM,
-               "",
-               n_threads)
-{
-    // copy the data
-    this->data = matrix2d_i(this->n_row, v_i(this->n_col)) ;
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_col; j++)
-        {   this->data[i][j] = data(i,j) ; }
-    }
-
-    // compute window means
-    this->window_mean       = matrix2d_d(this->n_row, v_d(this->n_shift, 0.)) ;
-    this->compute_window_means() ;
-
-    // initialise, copy and compute probs
-    this->post_prob         = matrix4d_d(this->n_row,
-                                       matrix3d_d(this->n_class,
-                                                matrix2d_d(this->n_shift,
-                                                         v_d(this->n_flip, 0.)))) ;
-    this->class_prob        = matrix3d_d(this->n_class,
-                                         matrix2d_d(this->n_shift,
-                                                    v_d(this->n_flip, 0.))) ;
-    this->class_prob_tot  = v_d(this->n_class, 0.) ;
-    this->post_prob_class = v_d(this->n_class, 0.) ;
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t s=0; s<this->n_shift; s++)
-            {   for(size_t f=0; f<this->n_flip; f++)
-                {   double p = posterior_prob(i,j,s,f) ;
-                    this->post_prob[i][j][s][f] = p ;
-                    this->post_prob_class[j]   += p ;
-                    this->post_prob_tot        += p ;
-                }
-            }
-        }
-    }
-    this->compute_class_prob() ;
-
-    // compute the references
-    this->references = matrix2d_d(this->n_class,
-                                  v_d(this->l_slice, 0.)) ;
-    this->compute_references() ;
-}
-
-ReferenceComputer::~ReferenceComputer()
-{ ; }
-
-Matrix2D<double> ReferenceComputer::get_references() const
-{
-    // add a 1st column with the class probabilities
-    Matrix2D<double> references(this->n_class, this->l_slice+1, 0.) ;
-    for(size_t i=0; i<this->n_class; i++)
-    {   // class prob
-        references(i,0) = this->class_prob_tot[i] ;
-        // signal
-        for(size_t j=0; j<this->l_slice; j++)
-        {   references(i,j+1) = this->references[i][j] ; }
-    }
-    return references ;
-}
-
diff --git a/src/Clustering.old/ReferenceComputer.hpp b/src/Clustering.old/ReferenceComputer.hpp
deleted file mode 100644
index bfaaa85..0000000
--- a/src/Clustering.old/ReferenceComputer.hpp
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef REFERENCECOMPUTER_HPP
-#define REFERENCECOMPUTER_HPP
-
-#include <EMEngine.hpp>
-
-#include <Matrix2D.hpp>
-#include <Matrix4D.hpp>
-
-/*!
- * \brief The ReferenceComputer class is a wrapper around the
- * EMEngine class that allows to compute the class references
- * given the posterior probability matrix and the data without
- * having to re-run the data classification.
- *
- * This class is typically made to be used in conjunction with
- * an EMEngine instance, using the following pattern :
- *
- * Matrix2D<int> data = ... ;
- * EMEngine em(data, ...) ;
- * em.cluster() ;
- * auto prob = em.get_posterior_prob() ;
- * auto obj  = ReferenceComputer(data, prob, ...) ;
- * auto ll   = obj.get_loglikelihood() ;
- * auto ref  = obj.get_references() ;
- */
-class ReferenceComputer : public EMEngine
-{
-    public:
-
-        ReferenceComputer() = delete ;
-
-        /*!
-         * \brief Constructs an obect and computes the references.
-         * \param the data for which the classification probabilities
-         * have been generated.
-         * \param the classification probabilities for the given data, as
-         * return by an EMEngine instance (see above).
-         * \param n_threads the number of threads dedicated to the
-         * computations.
-         */
-        ReferenceComputer(const Matrix2D<int>& data,
-                          const Matrix4D<double>& posterior_prob,
-                          size_t n_threads) ;
-
-        /*!
-         * \brief Destructor.
-         */
-        virtual ~ReferenceComputer() override ;
-
-        /*!
-         * \brief Returns a matrix with the class class references
-         * (protypic signal), on each row.
-         * The 1st column contains the class probability, the
-         * following ones the class signal.
-         * \return a matrix containing the class references and their
-         * probabalities, on each row.
-         */
-        virtual Matrix2D<double> get_references() const override ;
-
-    // removes the following methods from the public interface to restrict it
-    private:
-        using EMEngine::cluster ;
-
-} ;
-
-
-#endif // REFERENCECOMPUTER_HPP
diff --git a/src/Clustering.old/typedef.hpp b/src/Clustering.old/typedef.hpp
deleted file mode 100644
index 4d3e91a..0000000
--- a/src/Clustering.old/typedef.hpp
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef TYPEDEFCLUSTERING_HPP
-#define TYPEDEFCLUSTERING_HPP
-
-#include <vector>   // std::vector
-#include <utility>  // std::pair
-
-typedef std::vector<int>        v_i ;
-typedef std::vector<double>     v_d ;
-typedef std::vector<v_i>        matrix2d_i ;
-typedef std::vector<v_d>        matrix2d_d ;
-typedef std::vector<matrix2d_d> matrix3d_d ;
-typedef std::vector<matrix3d_d> matrix4d_d ;
-
-typedef std::vector<std::pair<int,int>> v_pair ;
-
-#endif // TYPEDEFCLUSTERING_HPP
diff --git a/src/Clustering/DataLayer.cpp b/src/Clustering/DataLayer.cpp
index 21340b2..e8ea814 100644
--- a/src/Clustering/DataLayer.cpp
+++ b/src/Clustering/DataLayer.cpp
@@ -1,142 +1,144 @@
 #include <DataLayer.hpp>
 
 #include <stdexcept>  // std::invalid_argument
 #include <cmath>      // log()
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
 #include <ThreadPool.hpp>
 
 DataLayer::DataLayer()
 {}
 
 
-DataLayer::DataLayer(const matrix2d_i& data,
+DataLayer::DataLayer(const Matrix2D<int>& data,
                      size_t n_class,
                      size_t n_shift,
                      bool flip)
     :data(data),
      flip(flip),
-     n_row(data.size()),
-     n_col(data[0].size()),
+     n_row(data.get_nrow()),
+     n_col(data.get_ncol()),
      n_class(n_class),
      l_model(n_col - n_shift + 1),
      n_shift(n_shift),
      n_flip(flip + 1)
 {   // models cannot be initialise here
     // as the number of categories depend
     // on the exact class
 }
 
-DataLayer::DataLayer(const matrix2d_i& data,
-                     const matrix3d_d& model,
+DataLayer::DataLayer(const Matrix2D<int>& data,
+                     const Matrix3D<double>& model,
                      bool flip)
     : data(data),
       model(model),
       flip(flip),
-      n_row(data.size()),
-      n_col(data[0].size()),
-      n_class(model.size()),
-      l_model(model[0].size()),
-      n_category(model[0][0].size()),
+      n_row(data.get_nrow()),
+      n_col(data.get_ncol()),
+      n_class(model.get_dim()[0]),
+      l_model(model.get_dim()[1]),
+      n_category(model.get_dim()[2]),
       n_shift(n_col - l_model + 1),
       n_flip(flip + 1)
 {   // check if model is not too long
     if(this->n_col < this->l_model)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! model is longer than data : %zu / %zu",
                 this->l_model, this->n_col) ;
         throw std::invalid_argument(msg) ;
     }
     this->n_shift = this->n_col - this->l_model + 1 ;
 }
 
 DataLayer::~DataLayer()
 {}
 
-matrix3d_d DataLayer::get_model() const
+Matrix3D<double> DataLayer::get_model() const
 {   return this->model ; }
 
-void DataLayer::check_loglikelihood_dim(const matrix4d_d& loglikelihood) const
-{   if(loglikelihood.size() != this->n_row)
+void DataLayer::check_loglikelihood_dim(const Matrix4D<double>& loglikelihood) const
+{   if(loglikelihood.get_dim()[0] != this->n_row)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! loglikelihood matrix 1st dimension is not "
                 "equal to data row number : %zu / %zu",
-                loglikelihood.size(), this->n_row) ;
+                loglikelihood.get_dim()[0], this->n_row) ;
         throw std::invalid_argument(msg) ;
     }
-    else if(loglikelihood[0].size() != this->n_class)
+    else if(loglikelihood.get_dim()[1] != this->n_class)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! loglikelihood matrix 2nd dimension is not "
                 "equal to model class number : %zu / %zu",
-                loglikelihood[0].size(), this->n_class) ;
+                loglikelihood.get_dim()[1], this->n_class) ;
         throw std::invalid_argument(msg) ;
     }
-    else if(loglikelihood[0][0].size() != this->n_shift)
+    else if(loglikelihood.get_dim()[2] != this->n_shift)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! loglikelihood matrix 3rd dimension is not "
                 "equal to model shift state number : %zu / %zu",
-                loglikelihood[0][0].size(), this->n_shift) ;
+                loglikelihood.get_dim()[2], this->n_shift) ;
         throw std::invalid_argument(msg) ;
     }
-    else if(loglikelihood[0][0][0].size() != this->n_flip)
+    else if(loglikelihood.get_dim()[3] != this->n_flip)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! loglikelihood matrix 4th dimension is not "
                 "equal to model flip state number : %zu / %zu",
-                loglikelihood[0][0][0].size(), this->n_flip) ;
+                loglikelihood.get_dim()[3], this->n_flip) ;
         throw std::invalid_argument(msg) ;
     }
 }
 
 void DataLayer::check_loglikelihood_max_dim(const vector_d& loglikelihood_max) const
 {   if(loglikelihood_max.size() != this->n_row)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! loglikelihood_max length is not "
                 "equal to data row number : %zu / %zu",
                 loglikelihood_max.size(), this->n_flip) ;
         throw std::invalid_argument(msg) ;
     }
 }
 
-void DataLayer::check_posterior_prob_dim(const matrix4d_d& posterior_prob) const
-{   if(posterior_prob.size() != this->n_row)
+void DataLayer::check_posterior_prob_dim(const Matrix4D<double>& posterior_prob) const
+{   if(posterior_prob.get_dim()[0] != this->n_row)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! posterior_prob matrix 1st dimension is not "
                 "equal to data row number : %zu / %zu",
-                posterior_prob.size(), this->n_row) ;
+                posterior_prob.get_dim()[0], this->n_row) ;
         throw std::invalid_argument(msg) ;
     }
-    else if(posterior_prob[0].size() != this->n_class)
+    else if(posterior_prob.get_dim()[1] != this->n_class)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! posterior_prob matrix 2nd dimension is not "
                 "equal to model class number : %zu / %zu",
-                posterior_prob[0].size(), this->n_class) ;
+                posterior_prob.get_dim()[1], this->n_class) ;
         throw std::invalid_argument(msg) ;
     }
-    else if(posterior_prob[0][0].size() != this->n_shift)
+    else if(posterior_prob.get_dim()[2] != this->n_shift)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! posterior_prob matrix 3rd dimension is not "
                 "equal to model shift state number : %zu / %zu",
-                posterior_prob[0][0].size(), this->n_shift) ;
+                posterior_prob.get_dim()[2], this->n_shift) ;
         throw std::invalid_argument(msg) ;
     }
-    else if(posterior_prob[0][0][0].size() != this->n_flip)
+    else if(posterior_prob.get_dim()[3] != this->n_flip)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! posterior_prob matrix 4th dimension is not "
                 "equal to model flip state number : %zu / %zu",
-                posterior_prob[0][0][0].size(), this->n_flip) ;
+                posterior_prob.get_dim()[3], this->n_flip) ;
         throw std::invalid_argument(msg) ;
     }
 }
 
 
 const double DataLayer::p_min = 1e-100 ;
 const double DataLayer::p_min_log = log(DataLayer::p_min) ;
diff --git a/src/Clustering/DataLayer.hpp b/src/Clustering/DataLayer.hpp
index cde2156..a2bb6a4 100644
--- a/src/Clustering/DataLayer.hpp
+++ b/src/Clustering/DataLayer.hpp
@@ -1,239 +1,224 @@
 #ifndef DATALAYER_HPP
 #define DATALAYER_HPP
 
 #include <iostream>
 #include <future>        // std::promise, std::future
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
 #include <ThreadPool.hpp>
 
+typedef std::vector<double> vector_d ;
+
 /*!
  * \brief The DataLayer class define the basic design
  * to handle probabilistic models together with
  * their data.
  * A DataLayer is made of two parts :
  * 1) a data matrix
  * 2) a model
  * The model contains the parameters of a probabilistic
  * model with one or more classes that fits the data.
  * The data likelihood given the model can be computed
  * and the model can be updated given a set of
  * posterior probabilities representing the data
  * assignments to the different classes.
  */
 class DataLayer
 {
     public:
         /*!
          * \brief the smallest acceptable probability
          * for computations.
          */
         static const double p_min ;
         /*!
          * \brief the log of the smallest probability.
          */
         static const double p_min_log ;
 
         /*!
          * \brief The possible flip states.
          */
         enum flip_states{FORWARD=0, REVERSE} ;
 
         /*!
          * \brief Default constructor.
          */
         DataLayer() ;
 
         /*!
          * \brief Constructs an object with the
          * given data.
          * An empty model is not initialised yet
          * as the model number of categories
          * depends on the final class.
          * \param data the data.
          * \param n_class the number of classes
          * of the model.
          * \param n_shift the number of shift
          * states of the model.
          * \param flip whether flipping is allowed.
          */
-        DataLayer(const matrix2d_i& data,
+        DataLayer(const Matrix2D<int>& data,
                   size_t n_class,
                   size_t n_shift,
                   bool flip) ;
 
         /*!
         * \brief Constructs an object with the
         * given data and model.
         * The model dimensions set the number of
         * classes and the shifting freedom.
         * \param data the data.
         * \param the model.
         * \param flip whether flipping is allowed.
         */
-        DataLayer(const matrix2d_i& data,
-                  const matrix3d_d& model,
+        DataLayer(const Matrix2D<int>& data,
+                  const Matrix3D<double>& model,
                   bool flip) ;
 
         /*!
          * \brief Destructor.
          */
         virtual ~DataLayer() ;
 
-        /*!
-         * \brief Sets the model values randomly.
-         */
-        virtual void seed_model_randomly() = 0 ;
-
-        /*!
-         * \brief Sets the model values by
-         * sampling rows in the data and
-         * assigning them as initial model
-         * values.
-         */
-        virtual void seed_model_sampling() = 0 ;
-
-        /*!
-         * \brief Sets the model values by
-         * using the first n_class rows in data.
-         */
-        virtual void seed_model_toy() = 0 ;
-
         /*!
          * \brief Computes the log likelihood of the data
          * given the current model parameters.
          * \param loglikelihood a matrix to store the
          * results. It should have the following dimensions :
          * 1st : same as the data number of row
          * 2nd : same as the model number of classes
          * 3rd : same as the number of shifts
          * 4th : same as the number of flip states
          * \param loglikelihood_max a vector containing the
          * max value for each row of log_likelihood.
          * Its length should be equal to the data row number.
          * \param threads a pointer to a thread pool to
          * parallelize the computations. If nullptr is given,
          * the computations are performed by the main thread.
          */
-        virtual void compute_loglikelihoods(matrix4d_d& loglikelihood,
+        virtual void compute_loglikelihoods(Matrix4D<double>& loglikelihood,
                                             vector_d& loglikelihood_max,
                                             ThreadPool* threads=nullptr) const = 0 ;
 
         /*!
          * \brief Updates the model given the posterior
          * probabilities (the probabilities of each row
          * in the data to be assigned to each class,
          * for each shift and flip state).
          * \param posterior_prob the data assignment probabilities to
          * the different classes.
          * \param threads a pointer to a thread pool to
          * parallelize the computations. If nullptr is given,
          * the computations are performed by the main thread.
          */
-        virtual void update_model(const matrix4d_d& posterior_prob,
+        virtual void update_model(const Matrix4D<double>& posterior_prob,
                                   ThreadPool* threads=nullptr) = 0 ;
         /*!
          * \brief Returns a copy of the current model.
          * \return the current model.
          * 1st dim : the number of classes
          * 2nd dim : the model length
          * 3rd dim : the number of value categories.
          */
-        virtual matrix3d_d get_model() const ;
+        virtual Matrix3D<double> get_model() const ;
 
     protected:
         /*!
          * \brief Checks the argument has compatible
          * dimensions with the data and models. If this is
          * not the case, throw a std::invalid_argument with
          * a relevant message.
          * \param logliklihood a matrix to store the
          * results. It should have the following dimensions :
          * 1st : same as the data row number
          * 2nd : same as the model class number
          * 3rd : same as the shift state number
          * 4th : same as the flip state number
          * \throw std::invalid_argument if the dimensions are
          * incorrect.
          */
-        void check_loglikelihood_dim(const matrix4d_d& loglikelihood) const ;
+        void check_loglikelihood_dim(const Matrix4D<double>& loglikelihood) const ;
 
         /*!
          * \brief Checks that the argument has compatible
          * dimensions with the data and models. If this is
          * not the case, throw a std::invalid_argument with
          * a relevant message.
          * \param loglikelihood_max a vector containing the
          * max value for each row of log_likelihood.
          * It should have a length equal to the number of
          * the data row number.
          * \throw std::invalid_argument if the dimensions are
          * incorrect.
          */
         void check_loglikelihood_max_dim(const vector_d& loglikelihood_max) const ;
 
         /*!
          * \brief Checks the argument has compatible
          * dimensions with the data and models. If this is
          * not the case, throw a std::invalid_argument with
          * a relevant message.
          * \param posterior_prob a matrix to store the
          * results. It should have the following dimensions :
          * 1st : same as the data row number
          * 2nd : same as the model class number
          * 3rd : same as the shift state number
          * 4th : same as the flip state number
          * \throw std::invalid_argument if the dimensions are
          * incorrect.
          */
-        void check_posterior_prob_dim(const matrix4d_d& posterior_prob) const ;
+        void check_posterior_prob_dim(const Matrix4D<double>& posterior_prob) const ;
 
         /*!
          * \brief the data.
          */
-        matrix2d_i data ;
+        Matrix2D<int> data ;
         /*!
          * \brief the data model.
          */
-        matrix3d_d model ;
+        Matrix3D<double> model ;
         /*!
          * \brief whether flip is enabled.
          */
         bool flip ;
         /*!
          * \brief the number of row in the data.
          */
         size_t n_row ;
         /*!
          * \brief the number of columns in the data.
          */
         size_t n_col ;
         /*!
          * \brief the number of classes in the model.
          */
         size_t n_class ;
         /*!
          * \brief the model length, its 2nd dimension.
          */
         size_t l_model ;
         /*!
          * \brief the number of variable categories in
          * the data. This is also the model 3rd
          * dimension.
          * Read counts are quantitative values and
          * have a number of categories equal to one
          * whereas as DNA sequences are made of
          * A,C,G,T (at least) and have 4 different
          * categories.
          */
         size_t n_category ;
         /*!
          * \brief the number of shift states.
          */
         size_t n_shift ;
         /*!
          * \brief the number of flip states.
          */
         size_t n_flip ;
 } ;
 
 #endif // DATALAYER_HPP
diff --git a/src/Clustering/EMBase.cpp b/src/Clustering/EMBase.cpp
new file mode 100644
index 0000000..3a45de9
--- /dev/null
+++ b/src/Clustering/EMBase.cpp
@@ -0,0 +1,298 @@
+#include <EMBase.hpp>
+
+#include <vector>
+#include <stdexcept>   // std::invalid_argument
+#include <future>      // std::promise, std::future
+#include <utility>     // std::pair, std::move()
+#include <functional>  // std::bind(), std::ref()
+#include <numeric>     // std::iota()
+#include <random>      // std::mt19937
+
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
+#include <ThreadPool.hpp>
+#include <BetaDistribution.hpp>      // beta_distribution()
+#include <Random.hpp>                // rand_string()
+#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
+#include <Statistics.hpp>            // sd(), normal_pmf()
+
+
+EMBase::EMBase(size_t n_row,
+               size_t n_col,
+               size_t n_class,
+               size_t n_iter,
+               size_t n_shift,
+               bool flip,
+               size_t n_threads=0)
+    : n_row(n_row),
+      n_col(n_col),
+      n_class(n_class),
+      n_shift(n_shift),
+      flip(flip),
+      n_flip(flip+1),
+      n_iter(n_iter),
+      l_model(n_col - n_shift + 1),
+      post_prob_tot(0.),
+      threads(nullptr)
+{   // check n_shift value
+    if(this->n_col < this->n_shift)
+    {   char msg[4096] ;
+        sprintf(msg, "Error! Shift is bigger than data column number "
+                     "(%zu / %zu)!",
+                this->n_shift, this->n_col) ;
+        throw std::invalid_argument(msg) ;
+    }
+
+    // data structures
+    this->loglikelihood = Matrix4D<double>(this->n_row,
+                                           this->n_class,
+                                           this->n_shift,
+                                           this->n_flip,
+                                           0.) ;
+    this->post_prob = Matrix4D<double>(this->n_row,
+                                       this->n_class,
+                                       this->n_shift,
+                                       this->n_flip,
+                                       0.) ;
+    this->post_state_prob = Matrix3D<double>(this->n_class,
+                                             this->n_shift,
+                                             this->n_flip,
+                                             0.) ;
+    this->post_class_prob = vector_d(this->n_class, 0) ;
+    this->post_prob_rowsum = vector_d(this->n_row, 0) ;
+    this->post_prob_colsum = vector_d(this->n_class, 0) ;
+    this->post_prob_tot = 0 ;
+    // threads
+    if(n_threads)
+    {   this->threads = new ThreadPool(n_threads) ; }
+
+}
+
+EMBase::~EMBase()
+{   // threads
+    if(this->threads != nullptr)
+    {   this->threads->join() ;
+        delete this->threads ;
+        this->threads = nullptr ;
+    }
+}
+
+Matrix4D<double> EMBase::get_post_prob() const
+{   return this->post_prob ; }
+
+vector_d EMBase::get_post_class_prob() const
+{   return this->post_class_prob ; }
+
+void EMBase::set_state_prob_uniform()
+{   double sum = this->n_class * this->n_shift * this->n_flip ;
+    for(size_t i=0; i<this->n_class; i++)
+    {   for(size_t j=0; j<this->n_shift; j++)
+        {   for(size_t k=0; k<this->n_flip; k++)
+            {   this->post_state_prob(i,j,k) = 1./sum ; }
+        }
+    }
+}
+
+void EMBase::set_post_prob_random(const std::string& seed)
+{   // set random number generator
+    // will be used to generate thread private seeds
+    getRandomGenerator(seed) ;
+
+    // don't parallelize
+    if(this->threads == nullptr)
+    {   std::promise<vector_d> promise ;
+        std::future<vector_d> future = promise.get_future() ;
+        this->set_post_prob_random_routine(0, this->n_row, seed, promise) ;
+        // compute the sum of post prob and the per class sum of post prob
+        // from the partial results computed on each slice
+        this->post_prob_tot = 0. ;
+        this->post_prob_colsum = future.get() ;
+        for(const auto& prob : this->post_prob_colsum)
+        {   this->post_prob_tot += prob ; }
+    }
+    // parallelize
+    else
+    {    size_t n_threads = this->threads->getNThread() ;
+
+        // compute the slices on which each thread will work
+        std::vector<std::pair<size_t,size_t>> slices =
+                ThreadPool::split_range(0, this->n_row,n_threads) ;
+
+        // get promises and futures
+        // the function run by the threads will compute
+        // the partial sum per class of post_prob for the given slice
+        // this should be used to compute the complete sum of post_prob
+        // and the complete sum per class of post_prob
+        std::vector<std::promise<vector_d>> promises(n_threads) ;
+        std::vector<std::future<vector_d>>  futures(n_threads) ;
+        // private seeds
+        std::vector<std::string> private_seeds(n_threads) ;
+        for(size_t i=0; i<n_threads; i++)
+        {   futures[i] = promises[i].get_future() ;
+            private_seeds[i] = rand_string(15) ;
+        }
+
+        // distribute work to threads
+        // -------------------------- threads start --------------------------
+        for(size_t i=0; i<n_threads; i++)
+        {   // generate a private seed to set the random number generator
+            // in this thread
+            auto slice = slices[i] ;
+            this->threads->addJob(std::move(
+                                      std::bind(&EMBase::set_post_prob_random_routine,
+                                                this,
+                                                slice.first,
+                                                slice.second,
+                                                private_seeds[i],
+                                                std::ref(promises[i])))) ;
+        }
+        // wait until all threads are done working
+        // compute the sum of post prob and the per class sum of post prob
+        // from the partial results computed on each slice
+        this->post_prob_tot = 0. ;
+        this->post_prob_colsum = vector_d(this->n_class, 0.) ;
+        for(auto& future : futures)
+        {   auto probs = future.get() ;
+            for(size_t i=0; i<this->n_class; i++)
+            {   double prob = probs[i] ;
+                this->post_prob_colsum[i] += prob ;
+                this->post_prob_tot       += prob ;
+            }
+        }
+        // -------------------------- threads stop ---------------------------
+    }
+
+    // compute class and state probs
+    this->compute_class_prob() ;
+}
+
+void EMBase::set_post_prob_random_routine(size_t from,
+                                          size_t to,
+                                          const std::string& seed,
+                                          std::promise<vector_d>& post_prob_colsum)
+{   // random number generator
+    std::mt19937 generator ;
+    std::seed_seq seed_sequence(seed.begin(),seed.end()) ;
+    generator.seed(seed_sequence) ;
+
+    // this->post_prob_tot = 0. ;
+    // this->post_prob_colsum = vector_d(this->n_class, 0.) ;
+    vector_d colsums = vector_d(this->n_class, 0.) ;
+
+    vector_d rowsums(this->n_row, 0) ;
+
+    // random sampling
+    beta_distribution<double> beta(1, this->n_row) ;
+    for(size_t i=from; i<to; i++)
+    {   for(size_t j=0; j<this->n_class; j++)
+        {   for(size_t k=0; k<this->n_shift; k++)
+            {   for(size_t l=0; l<this->n_flip; l++)
+                {   double p = beta(generator) ;
+                    this->post_prob(i,j,k,l) = p ;
+                    rowsums[i] += p ;
+                }
+            }
+        }
+    }
+
+    // normalization
+    for(size_t i=from; i<to; i++)
+    {   for(size_t j=0; j<this->n_class; j++)
+        {   for(size_t k=0; k<this->n_shift; k++)
+            {   for(size_t l=0; l<this->n_flip; l++)
+                {   double p = this->post_prob(i,j,k,l) / rowsums[i] ;
+                    this->post_prob(i,j,k,l) = p ;
+                    // this->post_prob_tot        += p ;
+                    // this->post_prob_colsum[j]  += p ;
+                    colsums[j] += p ;
+                }
+            }
+        }
+    }
+
+    // compute class and state probs
+    // this->compute_class_prob() ;
+    post_prob_colsum.set_value(colsums) ;
+}
+
+void EMBase::compute_class_prob()
+{
+    for(size_t n_class=0; n_class<this->n_class; n_class++)
+    {   // reset total
+        this->post_class_prob[n_class] = 0. ;
+        for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
+        {   for(size_t flip=0; flip<this->n_flip; flip++)
+            {   // sum
+                this->post_state_prob(n_class,n_shift,flip) = 0. ;
+                for(size_t i=0; i<this->n_row; i++)
+                {   this->post_state_prob(n_class,n_shift,flip) +=
+                                                this->post_prob(i,n_class,n_shift,flip) ;
+                }
+                // normalize
+                this->post_state_prob(n_class,n_shift,flip) /= this->post_prob_tot ;
+                this->post_class_prob[n_class] += this->post_state_prob(n_class,n_shift,flip) ;
+            }
+        }
+    }
+}
+
+void EMBase::center_post_state_prob()
+{
+    if(this->n_shift == 1)
+    {   return ; }
+
+    // the possible shift states
+    vector_d shifts(this->n_shift) ;
+    std::iota(shifts.begin(), shifts.end(), 1.) ;
+
+    // the shift probabilities and the class probabilies
+    // (no need to norm., class_prob sums to 1)
+    double shifts_prob_measured_tot = 0. ;
+    vector_d shifts_prob_measured(this->n_shift) ;
+    for(size_t s=0; s<this->n_shift; s++)
+    {   for(size_t k=0; k<this->n_class; k++)
+        {   for(size_t f=0; f<this->n_flip; f++)
+            {   shifts_prob_measured[s]  += this->post_state_prob(k,s,f) ;
+                shifts_prob_measured_tot += this->post_state_prob(k,s,f) ;
+            }
+        }
+    }
+
+
+    // the shift mean and (biased) standard deviation
+    double shifts_sd = sd(shifts, shifts_prob_measured, false) ;
+
+    // the shift probabilities under the assumption that is
+    // distributed as a gaussian centered on
+    // the central shift state with sd and mean as in the data
+    // sd as the data
+    vector_d shifts_prob_centered(shifts.size(), 0.) ;
+    double shifts_prob_centered_tot = 0. ;
+    for(size_t i=0; i<shifts.size(); i++)
+    {   shifts_prob_centered[i]   = normal_pmf(shifts[i],
+                                               (this->n_shift/2)+1, shifts_sd) ;
+        shifts_prob_centered_tot += shifts_prob_centered[i] ;
+    }
+
+    for(size_t k=0; k<this->n_class; k++)
+    {   for(size_t f=0; f<this->n_flip; f++)
+        {   for(size_t s=0; s<this->n_shift; s++)
+            {   this->post_state_prob(k,s,f) = this->post_class_prob[k] *
+                                                 shifts_prob_centered[s] /
+                                                (this->n_flip * shifts_prob_centered_tot) ;
+            }
+        }
+    }
+
+    // shifts_prob_measured_tot = 0. ;
+    shifts_prob_measured.clear() ;
+    shifts_prob_measured.resize(this->n_shift) ;
+    for(size_t s=0; s<this->n_shift; s++)
+    {   for(size_t k=0; k<this->n_class; k++)
+        {   for(size_t f=0; f<this->n_flip; f++)
+            {   shifts_prob_measured[s]  +=
+                        this->post_state_prob(k,s,f) ;
+            }
+        }
+    }
+}
diff --git a/src/Clustering/EMBase.hpp b/src/Clustering/EMBase.hpp
new file mode 100644
index 0000000..fb0f00c
--- /dev/null
+++ b/src/Clustering/EMBase.hpp
@@ -0,0 +1,240 @@
+#ifndef EMBASE_HPP
+#define EMBASE_HPP
+
+#include <iostream>
+#include <vector>
+#include <future>  // std::promise
+
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
+#include <ThreadPool.hpp>
+
+
+typedef std::vector<double> vector_d ;
+
+
+/*!
+ * \brief The EMBase class is a base class
+ * providing the basic support for classes
+ * in implementing read density, sequence
+ * and both at the time classification
+ * procedures.
+ */
+class EMBase
+{   public:
+        /*!
+         * \brief The possible exit codes for the classification
+         * method.
+         * 0 the classification procedure converged, 1 the
+         * classification procedure ended by reaching the maximum
+         * number of iterations, 2 the classification procedure
+         * encountered an error.
+         */
+        enum exit_codes {CONVERGENCE=0, ITER_MAX, FAILURE} ;
+
+    public:
+         /*!
+         * \brief Constructs an EMBase object.
+         * \param n_row the number of rows in the data matrix.
+         * \param n_col the number of columns in the data matrix.
+         * \param n_iter the number of optimization iterations.
+         * \param n_shift the number of shift states allowed.
+         * \param flip whether flipping is allowed.
+         * \param n_threads the number of parallel threads
+         * to run the computations. 0 means no parallel
+         * computing, everything is run on the main thread.
+         * \throw std::invalid_argument if the shifting freedom
+         * is bigger than the number of columns.
+         */
+        EMBase(size_t n_row,
+               size_t n_col,
+               size_t n_class,
+               size_t n_iter,
+               size_t n_shift,
+               bool flip,
+               size_t n_threads) ;
+
+        EMBase(const EMBase& other) = delete ;
+
+        /*!
+         * \brief Destructor.
+         */
+        virtual ~EMBase() ;
+
+        /*!
+         * \brief Returns the posterior probability
+         * of each point belonging to each class, for
+         * each possible shift and flip state.
+         * \return the posterior probability matrix,
+         * with the following dimensions :
+         * 1st dim : the data points
+         * 2nd dim : the classes
+         * 3rd dim : the shift states
+         * 4th dim : the flip states
+         */
+        virtual Matrix4D<double> get_post_prob() const ;
+
+        /*!
+         * \brief Returns the posterior class
+         * probabilities (the total class
+         * probability over all shift and
+         * flip states).
+         * \return the posterior class
+         * probabilities.
+         */
+        virtual vector_d get_post_class_prob() const ;
+
+        /*!
+         * \brief Runs the models optimization and the
+         * data classification.
+         * \return a code indicating how the optimization
+         * ended.
+         */
+        virtual EMBase::exit_codes classify() = 0 ;
+
+    protected:
+
+        /*!
+         * \brief Computes the data log likelihood given the
+         * current models, likelihood for each state.
+         */
+        virtual void compute_loglikelihood() = 0 ;
+
+        /*!
+         * \brief Computes the data posterior probabilties.
+         */
+        virtual void compute_post_prob() = 0 ;
+
+        /*!
+         * \brief Update the data models for all layers, given
+         * the current posterior and class probabilities.
+         */
+        virtual void update_models() = 0;
+
+        /*!
+         * \brief Sets all the state probabilities
+         * (all shift and flip states in all classes)
+         * to a uniform probability.
+         */
+        void set_state_prob_uniform() ;
+
+        /*!
+         * \brief Sets the posterior
+         * probabilities randomly (by
+         * sampling them from a beta
+         * distribution) and update all
+         * other probabilities accordingly.
+         * \param seed a seed to set the initial
+         * state of the random number generator.
+         */
+        void set_post_prob_random(const std::string& seed) ;
+
+        /*!
+         * \brief The routine that effectively
+         * sets the posterior probabilities randomly
+         * (by sampling them from a beta
+         * distribution).
+         * \param from the index of the first row
+         * in the data to consider.
+         * \param to the index of the past last row
+         * in the data to consider.
+         * \param done the partial column (over the classes)
+         * sum of posterior probabilities. If several routines
+         * are running together, the colsums are retrieved by
+         * summing up the vectors together.
+         * \param seed a seed to set the initial
+         * state of the random number generator.
+         */
+        void set_post_prob_random_routine(size_t from,
+                                          size_t to,
+                                          const std::string& seed,
+                                          std::promise<vector_d>& post_prob_colsum) ;
+
+        /*!
+         * \brief Computes the class/state probabilities from the
+         * posterior probabilities.
+         */
+        void compute_class_prob() ;
+
+        /*!
+         * \brief Modifies the state probabilities in such a
+         * way that the state probabilities are then normaly
+         * distributed, centered on the middle shift state.
+         * However, the overall class probabilities remain
+         * unchanged.
+         */
+        void center_post_state_prob() ;
+
+        /*!
+         * \brief the number of rows in data.
+         */
+        size_t n_row ;
+        /*!
+         * \brief the number of columns in data.
+         */
+        size_t n_col ;
+        /*!
+         * \brief the number of classes.
+         */
+        size_t n_class ;
+        /*!
+         * \brief the number of shift states.
+         */
+        size_t n_shift ;
+        /*!
+         * \brief whther flip is allowed.
+         */
+        bool flip ;
+        /*!
+         * \brief zhe number of flip states.
+         */
+        size_t n_flip ;
+        /*!
+         * \brief the number of iterations.
+         */
+        size_t n_iter ;
+        /*!
+         * \brief the length of the models.
+         */
+        size_t l_model ;
+
+        /*!
+         * \brief the joint loglikelihood for each data point,
+         * for each state (each class for each
+         * shift and flip state).
+         */
+        Matrix4D<double> loglikelihood ;
+        /*!
+         * \brief the posterior probabilities.
+         */
+        Matrix4D<double> post_prob ;
+        /*!
+         * \brief the states (shift and flip in each class)
+         * probabilities.
+         */
+        Matrix3D<double> post_state_prob ;
+        /*!
+         * \brief the total prob per class.
+         */
+        vector_d post_class_prob ;
+        /*!
+         * \brief the sum per row (data point) of post_prob.
+         */
+        vector_d post_prob_rowsum ;
+        /*!
+         * \brief the sum per column (class) of post_prob.
+         */
+        vector_d post_prob_colsum ;
+        /*!
+         * \brief the total of post_prob.
+         */
+        double post_prob_tot ;
+        /*!
+         * \brief the threads.
+         */
+        ThreadPool* threads ;
+} ;
+
+
+#endif // EMBASE_HPP
diff --git a/src/Clustering/EMEngine.cpp b/src/Clustering/EMEngine.cpp
deleted file mode 100644
index dc9bd11..0000000
--- a/src/Clustering/EMEngine.cpp
+++ /dev/null
@@ -1,586 +0,0 @@
-#include <EMEngine.hpp>
-#include <cmath>                     // log(), exp(), pow()
-#include <future>                    // std::promise, std::future
-#include <utility>                   // std::pair, std::move()
-#include <functional>                // std::bind(), std::ref()
-
-#include <Random.hpp>                // rand_int_uniform()
-#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
-#include <BetaDistribution.hpp>      // beta_distribution()
-#include <Statistics.hpp>            // poisson_pmf(), normal_pmf(), sd()
-#include <ConsoleProgressBar.hpp>    // ConsoleProgressBar
-#include <matrices.hpp>
-
-
-EMEngine::EMEngine(const std::vector<matrix2d_i>& read_matrices,
-                   const std::vector<matrix2d_i>& seq_matrices,
-                   size_t n_class,
-                   size_t n_iter,
-                   size_t n_shift,
-                   bool flip,
-                   EMEngine::seeding_codes seeding,
-                   const std::string& seed,
-                   size_t n_threads)
-    : read_layer_list(),
-      sequence_layer_list(),
-      threads(nullptr)
-
-{   std::cerr << "EMEngine::EMEngine START" << std::endl ;
-    // nb of layers
-    size_t n_layer_read = read_matrices.size() ;
-    size_t n_layer_seq  = seq_matrices.size() ;
-    this->n_layer = n_layer_read + n_layer_seq ;
-    if(this->n_layer == 0)
-    {   throw std::invalid_argument("Error! No data layer given!") ; }
-
-    // matrices dimensions
-    size_t n_row = 0 ;
-    size_t n_col = 0 ;
-    if(n_layer_read)
-    {   n_row = read_matrices[0].size() ;
-        n_col = read_matrices[0][0].size() ;
-    }
-    else
-    {   n_row = seq_matrices[0].size() ;
-        n_col = seq_matrices[0][0].size() ;
-    }
-    for(const auto& matrix : read_matrices)
-    {   if(matrix.size() != n_row)
-        {   char msg[4096] ;
-            sprintf(msg, "Error! A read layer row number is invalid "
-                         "(found %zu, expected %zu)!",
-                    matrix.size(), n_row) ;
-            throw std::invalid_argument(msg) ;
-        }
-        else if(matrix[0].size() != n_col)
-        {   char msg[4096] ;
-            sprintf(msg, "Error! A read layer column number is invalid "
-                         "(found %zu, expected %zu)!",
-                    matrix.size(), n_col) ;
-            throw std::invalid_argument(msg) ;
-        }
-    }
-    for(const auto& matrix : seq_matrices)
-    {   if(matrix.size() != n_row)
-        {   char msg[4096] ;
-            sprintf(msg, "Error! A sequence layer row number is invalid "
-                         "(found %zu, expected %zu)!",
-                    matrix.size(), n_row) ;
-            throw std::invalid_argument(msg) ;
-        }
-        else if(matrix[0].size() != n_col)
-        {   char msg[4096] ;
-            sprintf(msg, "Error! A sequence layes column number is invalid "
-                         "(found %zu, expected %zu)!",
-                    matrix.size(), n_col) ;
-            throw std::invalid_argument(msg) ;
-        }
-    }
-    this->n_row = n_row ;
-    this->n_col = n_col ;
-
-    // class, shift, flip, iter
-    this->n_class = n_class ;
-    this->n_shift = n_shift ;
-    this->n_flip  = flip+1 ;
-    this->flip    = flip ;
-    this->n_iter  = n_iter ;
-
-    // model length
-    if(this->n_col < this->n_shift)
-    {   char msg[4096] ;
-        sprintf(msg, "Error! Shift is bigger than data column number "
-                     "(%zu / %zu)!",
-                this->n_shift, this->n_col) ;
-        throw std::invalid_argument(msg) ;
-    }
-    this->l_model = n_col - n_shift + 1 ;
-
-    std::cerr << "EMEngine::EMEngine           " << std::endl
-              << "                   n_row   : " << this->n_row << std::endl
-              << "                   n_col   : " << this->n_col << std::endl
-              << "                   n_class : " << this->n_class << std::endl
-              << "                   n_shift : " << this->n_shift << std::endl
-              << "                   n_flip  : " << this->n_flip << std::endl
-              << "                   n_layer : " << this->n_layer << std::endl ;
-
-
-    // data structures
-    this->loglikelihood =
-            std::vector<matrix4d_d>(this->n_layer,
-                                   matrix4d_d(this->n_row,
-                                              matrix3d_d(this->n_class,
-                                                         matrix2d_d(this->n_shift,
-                                                                    vector_d(this->n_flip, 0))))) ;
-    this->loglikelihood_max = matrix2d_d(this->n_layer,
-                                         vector_d(this->n_row, 0)) ;
-    this->loglikelihood_joint =
-            matrix4d_d(this->n_row,
-                       matrix3d_d(this->n_class,
-                                  matrix2d_d(this->n_shift,
-                                             vector_d(this->n_flip, 0)))) ;
-    std::cerr << "EMEngine::EMEngine loglikelihood _joint created " << this->n_row*this->n_class*this->n_shift*this->n_flip << std::endl ;
-    /*
-     this->post_prob =
-            matrix4d_d(this->n_row,
-                       matrix3d_d(this->n_class,
-                                  matrix2d_d(this->n_shift,
-                                             vector_d(this->n_flip, 0)))) ;
-    */
-    std::vector<int> tmp(this->n_row*this->n_class*this->n_shift*this->n_flip) ;
-    std::cerr << "EMEngine::EMEngine post_prob created " << this->n_row*this->n_class*this->n_shift*this->n_flip << std::endl ;
-    this->post_state_prob =
-            matrix3d_d(this->n_class,
-                       matrix2d_d(this->n_shift,
-                                  vector_d(this->n_flip, 0))) ;
-    std::cerr << "EMEngine::EMEngine post_state_prob created " << this->n_class*this->n_shift*this->n_flip << std::endl ;
-    this->post_class_prob = vector_d(this->n_class, 0) ;
-    std::cerr << "EMEngine::EMEngine post_class_prob created " << this->n_class << std::endl ;
-    this->post_prob_rowsum = vector_d(this->n_row, 0) ;
-    std::cerr << "EMEngine::EMEngine post_prob_rowsum created " << this->n_row << std::endl ;
-    this->post_prob_colsum = vector_d(this->n_class, 0) ;
-    std::cerr << "EMEngine::EMEngine post_prob_colsum created" << this->n_class << std::endl ;
-    this->post_prob_tot = 0 ;
-
-    // set random number generator seed
-    getRandomGenerator(seed) ;
-
-    // threads
-    if(n_threads)
-    {   this->threads = new ThreadPool(n_threads) ; }
-
-    if(seeding == EMEngine::RANDOM)
-    {}
-
-    // initialise post prob randomly
-    this->set_post_prob_random() ;
-    std::cerr << "EMEngine::EMEngine post prob set" << std::endl ;
-    // create read layer and compute the models from the post prob
-    for(const auto& matrix : read_matrices)
-    {   std::cerr << "EMEngine::EMEngine creating ReadLayer" << std::endl ;
-        // create the layer
-        this->read_layer_list.push_back(new ReadLayer(matrix,
-                                                      this->n_class,
-                                                      this->n_shift,
-                                                      flip,
-                                                      this->threads)) ;
-        this->read_layer_list.back()->update_model(this->post_prob,
-                                                   this->threads) ;
-    }
-    // create read layer and compute the models from the post prob
-    for(const auto& matrix : seq_matrices)
-    {   std::cerr << "EMEngine::EMEngine creating SeqLayer" << std::endl ;
-        // create the layer
-        this->sequence_layer_list.push_back(new SequenceLayer(matrix,
-                                                              this->n_class,
-                                                              this->n_shift,
-                                                              flip)) ;
-        this->sequence_layer_list.back()->update_model(this->post_prob,
-                                                       this->threads) ;
-    }
-    std::cerr << "EMEngine::EMEngine END" << std::endl ;
-
-    /*
-    // create read layers with initialised models
-    for(const auto& matrix : read_matrices)
-    {   // create the layer
-        this->read_layer_list.push_back(new ReadLayer(matrix,
-                                                      this->n_class,
-                                                      this->n_shift,
-                                                      flip,
-                                                      this->threads)) ;
-        // seed the models
-        if(seeding == EMEngine::RANDOM)
-        {   this->read_layer_list.back()->seed_model_randomly() ; }
-        else if(seeding == EMEngine::SAMPLING)
-        {   this->read_layer_list.back()->seed_model_sampling() ; }
-        else if(seeding == EMEngine::TOY)
-        {   this->read_layer_list.back()->seed_model_toy() ; }
-    }
-    // create read layers with initialised models
-    for(const auto& matrix : seq_matrices)
-    {   // create the layer
-        this->sequence_layer_list.push_back(new SequenceLayer(matrix,
-                                                              this->n_class,
-                                                              this->n_shift,
-                                                              flip)) ;
-        // seed the models
-        if(seeding == EMEngine::RANDOM)
-        {   this->sequence_layer_list.back()->seed_model_randomly() ; }
-        else if(seeding == EMEngine::SAMPLING)
-        {   this->sequence_layer_list.back()->seed_model_sampling() ; }
-        else if(seeding == EMEngine::TOY)
-        {   this->sequence_layer_list.back()->seed_model_toy() ; }
-    }
-    // set the class probabilities to a uniform distribution
-    this->set_state_prob_uniform()
-    */
-}
-
-EMEngine::~EMEngine()
-{   // threads
-    if(this->threads != nullptr)
-    {   this->threads->join() ;
-        delete this->threads ;
-        this->threads = nullptr ;
-    }
-    // read data and models
-    for(auto& ptr : this->read_layer_list)
-    {   if(ptr != nullptr)
-        {   delete ptr ;
-            ptr = nullptr ;
-        }
-    }
-    // sequence data and models
-    for(auto& ptr : this->sequence_layer_list)
-    {   if(ptr != nullptr)
-        {   delete ptr ;
-            ptr = nullptr ;
-        }
-    }
-}
-
-std::vector<matrix3d_d> EMEngine::get_read_models() const
-{   std::vector<matrix3d_d> models ;
-    for(const auto& ptr : this->read_layer_list)
-    {   models.push_back(ptr->get_model()) ; }
-    return models ;
-}
-
-std::vector<matrix3d_d> EMEngine::get_sequence_models() const
-{   std::vector<matrix3d_d> models ;
-    for(const auto& ptr : this->sequence_layer_list)
-    {   models.push_back(ptr->get_model()) ; }
-    return models ;
-}
-
-matrix4d_d EMEngine::get_post_prob() const
-{   return this->post_prob ; }
-
-vector_d EMEngine::get_post_class_prob() const
-{   return this->post_class_prob ; }
-
-EMEngine::exit_codes EMEngine::classify()
-{
-    size_t bar_update_n = this->n_iter ;
-    ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;
-
-    // optimize the partition
-    for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
-    {
-        // E-step
-        this->compute_loglikelihood() ;
-        this->compute_post_prob() ;
-        // M-step
-        this->compute_class_prob() ;
-        this->update_models() ;
-        this->center_post_state_prob() ;
-
-        bar.update() ;
-    }
-    bar.update() ; std::cerr << std::endl ;
-    return EMEngine::exit_codes::SUCCESS ;
-}
-
-void EMEngine::set_post_prob_random()
-{
-    this->post_prob_tot = 0. ;
-    this->post_prob_colsum = vector_d(this->n_class, 0.) ;
-
-    vector_d rowsums(this->n_row, 0) ;
-
-    // random sampling
-     beta_distribution<> beta(1, this->n_row) ;
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t k=0; k<this->n_shift; k++)
-            {   for(size_t l=0; l<this->n_flip; l++)
-                {   double p = beta(getRandomGenerator()) ;
-                    this->post_prob[i][j][k][l] = p ;
-                    rowsums[i] += p ;
-                }
-            }
-        }
-    }
-
-    // normalization
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t k=0; k<this->n_shift; k++)
-            {   for(size_t l=0; l<this->n_flip; l++)
-                {   double p = this->post_prob[i][j][k][l] / rowsums[i] ;
-                    this->post_prob[i][j][k][l] = p ;
-                    this->post_prob_tot        += p ;
-                    this->post_prob_colsum[j]  += p ;
-                }
-            }
-        }
-    }
-
-    // compute class and state probs
-    this->compute_class_prob() ;
-}
-
-void EMEngine::set_state_prob_uniform()
-{   double sum = this->n_class * this->n_shift * this->n_flip ;
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->n_shift; j++)
-        {   for(size_t k=0; k<this->n_flip; k++)
-            {   this->post_state_prob[i][j][k] = 1./sum ; }
-        }
-    }
-}
-
-void EMEngine::compute_loglikelihood()
-{   // compute the loglikelihood for each layer
-    size_t i = 0 ;
-    for(auto& ptr : this->read_layer_list)
-    {   ptr->compute_loglikelihoods(this->loglikelihood[i],
-                                    this->loglikelihood_max[i],
-                                    this->threads) ;
-        i++ ;
-    }
-    for(auto& ptr : this->sequence_layer_list)
-    {   ptr->compute_loglikelihoods(this->loglikelihood[i],
-                                    this->loglikelihood_max[i],
-                                    this->threads) ;
-        i++ ;
-    }
-    // sum the likelihood for each state, over all layers
-    // this is the "joint likelihood"
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t k=0; k<this->n_shift; k++)
-            {   for(size_t l=0; l<this->n_flip; l++)
-                {
-                    // reset
-                    this->loglikelihood_joint[i][j][k][l] = 0. ;
-                    // sum
-                    for(size_t m=0; m<this->n_layer; m++)
-                    {   this->loglikelihood_joint[i][j][k][l] +=
-                                (this->loglikelihood[m][i][j][k][l] -
-                                 this->loglikelihood_max[m][i]) ;
-                    }
-                    /*
-                    double ll_joint = 0. ;
-                    // sum
-                    for(size_t m=0; m<this->n_layer; m++)
-                    {   ll_joint +=
-                                (this->loglikelihood[m][i][j][k][l] -
-                                 this->loglikelihood_max[m][i]) ;
-                    }
-                    this->loglikelihood_joint[i][j][k][l] = std::max(ll_joint, SequenceLayer::p_min_log) ;
-                    */
-                }
-            }
-        }
-    }
-}
-
-void EMEngine::compute_post_prob()
-{   // don't parallelize
-    if(this->threads == nullptr)
-    {   std::promise<vector_d> promise ;
-        std::future<vector_d> future = promise.get_future() ;
-        this->compute_post_prob_routine(0, this->n_row, promise) ;
-        // compute the sum of post prob and the per class sum of post prob
-        // from the partial results computed on each slice
-        this->post_prob_tot = 0. ;
-        this->post_prob_colsum = future.get() ;
-        for(const auto& prob : this->post_prob_colsum)
-        {   this->post_prob_tot += prob ; }
-    }
-    // parallelize
-    else
-    {    size_t n_threads = this->threads->getNThread() ;
-
-        // compute the slices on which each thread will work
-        std::vector<std::pair<size_t,size_t>> slices =
-                ThreadPool::split_range(0, this->n_row,n_threads) ;
-
-        // get promises and futures
-        // the function run by the threads will compute
-        // the partial sum per class of post_prob for the given slice
-        // this should be used to compute the complete sum of post_prob
-        // and the complete sum per class of post_prob
-        std::vector<std::promise<vector_d>> promises(n_threads) ;
-        std::vector<std::future<vector_d>>  futures(n_threads) ;
-        for(size_t i=0; i<n_threads; i++)
-        {   futures[i] = promises[i].get_future() ; }
-
-        // distribute work to threads
-        // -------------------------- threads start --------------------------
-        for(size_t i=0; i<n_threads; i++)
-        {   auto slice = slices[i] ;
-            this->threads->addJob(std::move(
-                                      std::bind(&EMEngine::compute_post_prob_routine,
-                                                this,
-                                                slice.first,
-                                                slice.second,
-                                                std::ref(promises[i])))) ;
-        }
-        // wait until all threads are done working
-        // compute the sum of post prob and the per class sum of post prob
-        // from the partial results computed on each slice
-        this->post_prob_tot = 0. ;
-        this->post_prob_colsum = vector_d(this->n_class, 0.) ;
-        for(auto& future : futures)
-        {   auto probs = future.get() ;
-            for(size_t i=0; i<this->n_class; i++)
-            {   double prob = probs[i] ;
-                this->post_prob_colsum[i] += prob ;
-                this->post_prob_tot       += prob ;
-            }
-        }
-        // -------------------------- threads stop ---------------------------
-    }
-}
-
-
-void EMEngine::compute_post_prob_routine(size_t from,
-                                         size_t to,
-                                         std::promise<vector_d>& post_prob_colsum)
-{   vector_d colsums(this->n_class, 0.) ;
-
-    // reset grand total
-    // this->post_prob_tot = 0 ;
-    // this->post_prob_colsum = vector_d(n_class, 0) ;
-
-    // post prob
-    for(size_t i=from; i<to; i++)
-    {   // reset row sum to 0
-        this->post_prob_rowsum[i] = 0. ;
-        for(size_t n_class=0; n_class<this->n_class; n_class++)
-        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
-                {   /*
-                    double p = std::max(exp(this->loglikelihood_joint[i][n_class][n_shift][n_flip]) *
-                                        this->post_state_prob[n_class][n_shift][n_flip],
-                                        DataLayer::p_min) ;
-                    */
-                    double p = exp(this->loglikelihood_joint[i][n_class][n_shift][n_flip]) *
-                                   this->post_state_prob[n_class][n_shift][n_flip] ;
-                    this->post_prob[i][n_class][n_shift][n_flip] = p ;
-                    this->post_prob_rowsum[i] += p ;
-                }
-            }
-        }
-        // normalize
-        for(size_t n_class=0; n_class<this->n_class; n_class++)
-        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
-                {   /*
-                    this->post_prob[i][n_class][n_shift][n_flip] /=
-                            this->post_prob_rowsum[i] ;
-                    double p = this->post_prob[i][n_class][n_shift][n_flip] ;
-                    colsums[n_class] += p ;
-                    */
-                    double p = std::max(this->post_prob[i][n_class][n_shift][n_flip] /
-                                        this->post_prob_rowsum[i],
-                                        ReadLayer::p_min) ;
-                    this->post_prob[i][n_class][n_shift][n_flip] = p ;
-                    colsums[n_class] += p ;
-                    // this->post_prob_colsum[n_class] += p ;
-                    // this->post_prob_tot += p ;
-                }
-            }
-        }
-    }
-    post_prob_colsum.set_value(colsums) ;
-}
-
-void EMEngine::compute_class_prob()
-{
-    for(size_t n_class=0; n_class<this->n_class; n_class++)
-    {   // reset total
-        this->post_class_prob[n_class] = 0. ;
-        for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-        {   for(size_t flip=0; flip<this->n_flip; flip++)
-            {   // sum
-                this->post_state_prob[n_class][n_shift][flip] = 0. ;
-                for(size_t i=0; i<this->n_row; i++)
-                {   this->post_state_prob[n_class][n_shift][flip] +=
-                                                this->post_prob[i][n_class][n_shift][flip] ;
-                }
-                // normalize
-                this->post_state_prob[n_class][n_shift][flip] /= this->post_prob_tot ;
-                this->post_class_prob[n_class] += this->post_state_prob[n_class][n_shift][flip] ;
-            }
-        }
-    }
-}
-
-void EMEngine::update_models()
-{   // read data and models
-    for(auto& ptr : this->read_layer_list)
-    {   ptr->update_model(this->post_prob,
-                          this->post_prob_colsum,
-                          this->threads) ;
-    }
-    // sequence data and models
-    for(auto& ptr : this->sequence_layer_list)
-    {   ptr->update_model(this->post_prob,
-                          this->threads) ;
-    }
-}
-
-void EMEngine::center_post_state_prob()
-{
-    if(this->n_shift == 1)
-    {   return ; }
-
-    // the possible shift states
-    vector_d shifts(this->n_shift) ;
-    std::iota(shifts.begin(), shifts.end(), 1.) ;
-
-    // the shift probabilities and the class probabilies
-    // (no need to norm., class_prob sums to 1)
-    double shifts_prob_measured_tot = 0. ;
-    std::vector<double> shifts_prob_measured(this->n_shift) ;
-    for(size_t s=0; s<this->n_shift; s++)
-    {   for(size_t k=0; k<this->n_class; k++)
-        {   for(size_t f=0; f<this->n_flip; f++)
-            {   shifts_prob_measured[s]  += this->post_state_prob[k][s][f] ;
-                shifts_prob_measured_tot += this->post_state_prob[k][s][f] ;
-            }
-        }
-    }
-
-
-    // the shift mean and (biased) standard deviation
-    double shifts_sd = sd(shifts, shifts_prob_measured, false) ;
-
-    // the shift probabilities under the assumption that is
-    // distributed as a gaussian centered on
-    // the central shift state with sd and mean as in the data
-    // sd as the data
-    vector_d shifts_prob_centered(shifts.size(), 0.) ;
-    double shifts_prob_centered_tot = 0. ;
-    for(size_t i=0; i<shifts.size(); i++)
-    {   shifts_prob_centered[i]   = normal_pmf(shifts[i],
-                                               (this->n_shift/2)+1, shifts_sd) ;
-        shifts_prob_centered_tot += shifts_prob_centered[i] ;
-    }
-
-    for(size_t k=0; k<this->n_class; k++)
-    {   for(size_t f=0; f<this->n_flip; f++)
-        {   for(size_t s=0; s<this->n_shift; s++)
-            {   this->post_state_prob[k][s][f] = this->post_class_prob[k] *
-                                                 shifts_prob_centered[s] /
-                                                (this->n_flip * shifts_prob_centered_tot) ;
-            }
-        }
-    }
-
-    // shifts_prob_measured_tot = 0. ;
-    shifts_prob_measured.clear() ;
-    shifts_prob_measured.resize(this->n_shift) ;
-    for(size_t s=0; s<this->n_shift; s++)
-    {   for(size_t k=0; k<this->n_class; k++)
-        {   for(size_t f=0; f<this->n_flip; f++)
-            {   shifts_prob_measured[s]  +=
-                        this->post_state_prob[k][s][f] ;
-            }
-        }
-    }
-}
diff --git a/src/Clustering/EMEngine.cpp.save b/src/Clustering/EMEngine.cpp.save
deleted file mode 100644
index 5ff245c..0000000
--- a/src/Clustering/EMEngine.cpp.save
+++ /dev/null
@@ -1,517 +0,0 @@
-#include <EMEngine.hpp>
-#include <cmath>                     // log(), exp(), pow()
-#include <future>                    // std::promise, std::future
-#include <utility>                   // std::pair, std::move()
-#include <functional>                // std::bind(), std::ref()
-
-#include <Random.hpp>                // rand_int_uniform()
-#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
-#include <Statistics.hpp>            // poisson_pmf(), normal_pmf(), sd()
-#include <ConsoleProgressBar.hpp>    // ConsoleProgressBar
-#include <matrices.hpp>
-
-
-EMEngine::EMEngine(const std::vector<matrix2d_i>& read_matrices,
-                   const std::vector<matrix2d_i>& seq_matrices,
-                   size_t n_class,
-                   size_t n_iter,
-                   size_t n_shift,
-                   bool flip,
-                   EMEngine::seeding_codes seeding,
-                   const std::string& seed,
-                   size_t n_threads)
-    : read_layer_list(),
-      sequence_layer_list(),
-      threads(nullptr)
-
-{   // nb of layers
-    size_t n_layer_read = read_matrices.size() ;
-    size_t n_layer_seq  = seq_matrices.size() ;
-    this->n_layer = n_layer_read + n_layer_seq ;
-    if(this->n_layer == 0)
-    {   throw std::invalid_argument("Error! No data layer given!") ; }
-
-    // matrices dimensions
-    size_t n_row = 0 ;
-    size_t n_col = 0 ;
-    if(n_layer_read)
-    {   n_row = read_matrices[0].size() ;
-        n_col = read_matrices[0][0].size() ;
-    }
-    else
-    {   n_row = seq_matrices[0].size() ;
-        n_col = seq_matrices[0][0].size() ;
-    }
-    for(const auto& matrix : read_matrices)
-    {   if(matrix.size() != n_row)
-        {   char msg[4096] ;
-            sprintf(msg, "Error! A read layer row number is invalid "
-                         "(found %zu, expected %zu)!",
-                    matrix.size(), n_row) ;
-            throw std::invalid_argument(msg) ;
-        }
-        else if(matrix[0].size() != n_col)
-        {   char msg[4096] ;
-            sprintf(msg, "Error! A read layer column number is invalid "
-                         "(found %zu, expected %zu)!",
-                    matrix.size(), n_col) ;
-            throw std::invalid_argument(msg) ;
-        }
-    }
-    for(const auto& matrix : seq_matrices)
-    {   if(matrix.size() != n_row)
-        {   char msg[4096] ;
-            sprintf(msg, "Error! A sequence layer row number is invalid "
-                         "(found %zu, expected %zu)!",
-                    matrix.size(), n_row) ;
-            throw std::invalid_argument(msg) ;
-        }
-        else if(matrix[0].size() != n_col)
-        {   char msg[4096] ;
-            sprintf(msg, "Error! A sequence layes column number is invalid "
-                         "(found %zu, expected %zu)!",
-                    matrix.size(), n_col) ;
-            throw std::invalid_argument(msg) ;
-        }
-    }
-    this->n_row = n_row ;
-    this->n_col = n_col ;
-
-    // class, shift, flip, iter
-    this->n_class = n_class ;
-    this->n_shift = n_shift ;
-    this->n_flip  = flip+1 ;
-    this->flip    = flip ;
-    this->n_iter  = n_iter ;
-
-    // model length
-    if(this->n_col < this->n_shift)
-    {   char msg[4096] ;
-        sprintf(msg, "Error! Shift is bigger than data column number "
-                     "(%zu / %zu)!",
-                this->n_shift, this->n_col) ;
-        throw std::invalid_argument(msg) ;
-    }
-    this->l_model = n_col - n_shift + 1 ;
-
-    // data structures
-    this->loglikelihood =
-            std::vector<matrix4d_d>(this->n_layer,
-                                   matrix4d_d(n_row,
-                                              matrix3d_d(this->n_class,
-                                                         matrix2d_d(this->n_shift,
-                                                                    vector_d(this->n_flip, 0))))) ;
-    this->loglikelihood_max = matrix2d_d(this->n_layer,
-                                         vector_d(this->n_row, 0)) ;
-    this->loglikelihood_joint =
-            matrix4d_d(this->n_row,
-                       matrix3d_d(this->n_class,
-                                  matrix2d_d(this->n_shift,
-                                             vector_d(this->n_flip, 0)))) ;
-    this->post_prob =
-            matrix4d_d(this->n_row,
-                       matrix3d_d(this->n_class,
-                                  matrix2d_d(this->n_shift,
-                                             vector_d(this->n_flip, 0)))) ;
-    this->post_state_prob =
-            matrix3d_d(n_class,
-                       matrix2d_d(this->n_shift,
-                                  vector_d(this->n_flip, 0))) ;
-    this->post_class_prob = vector_d(n_class, 0) ;
-    this->post_prob_rowsum = vector_d(n_row, 0) ;
-    this->post_prob_colsum = vector_d(n_class, 0) ;
-    this->post_prob_tot = 0 ;
-
-    // set random number generator seed
-    getRandomGenerator(seed) ;
-
-    // threads
-    if(n_threads)
-    {   this->threads = new ThreadPool(n_threads) ; }
-
-    // create read layers with initialised models
-    for(const auto& matrix : read_matrices)
-    {   // create the layer
-        this->read_layer_list.push_back(new ReadLayer(matrix,
-                                                      this->n_class,
-                                                      this->n_shift,
-                                                      flip,
-                                                      this->threads)) ;
-        // seed the models
-        if(seeding == EMEngine::RANDOM)
-        {   this->read_layer_list.back()->seed_model_randomly() ; }
-        else if(seeding == EMEngine::SAMPLING)
-        {   this->read_layer_list.back()->seed_model_sampling() ; }
-        else if(seeding == EMEngine::TOY)
-        {   this->read_layer_list.back()->seed_model_toy() ; }
-    }
-    // create read layers with initialised models
-    for(const auto& matrix : seq_matrices)
-    {   // create the layer
-        this->sequence_layer_list.push_back(new SequenceLayer(matrix,
-                                                              this->n_class,
-                                                              this->n_shift,
-                                                              flip)) ;
-        // seed the models
-        if(seeding == EMEngine::RANDOM)
-        {   this->sequence_layer_list.back()->seed_model_randomly() ; }
-        else if(seeding == EMEngine::SAMPLING)
-        {   this->sequence_layer_list.back()->seed_model_sampling() ; }
-        else if(seeding == EMEngine::TOY)
-        {   this->sequence_layer_list.back()->seed_model_toy() ; }
-    }
-    // set the class probabilities to a uniform distribution
-    this->set_state_prob_uniform() ;
-}
-
-EMEngine::~EMEngine()
-{   // threads
-    if(this->threads != nullptr)
-    {   this->threads->join() ;
-        delete this->threads ;
-        this->threads = nullptr ;
-    }
-    // read data and models
-    for(auto& ptr : this->read_layer_list)
-    {   if(ptr != nullptr)
-        {   delete ptr ;
-            ptr = nullptr ;
-        }
-    }
-    // sequence data and models
-    for(auto& ptr : this->sequence_layer_list)
-    {   if(ptr != nullptr)
-        {   delete ptr ;
-            ptr = nullptr ;
-        }
-    }
-}
-
-std::vector<matrix3d_d> EMEngine::get_read_models() const
-{   std::vector<matrix3d_d> models ;
-    for(const auto& ptr : this->read_layer_list)
-    {   models.push_back(ptr->get_model()) ; }
-    return models ;
-}
-
-std::vector<matrix3d_d> EMEngine::get_sequence_models() const
-{   std::vector<matrix3d_d> models ;
-    for(const auto& ptr : this->sequence_layer_list)
-    {   models.push_back(ptr->get_model()) ; }
-    return models ;
-}
-
-matrix4d_d EMEngine::get_post_prob() const
-{   return this->post_prob ; }
-
-vector_d EMEngine::get_post_class_prob() const
-{   return this->post_class_prob ; }
-
-EMEngine::exit_codes EMEngine::classify()
-{
-    size_t bar_update_n = this->n_iter ;
-    ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;
-
-    std::cerr << "EM new" << std::endl ;
-    std::cerr << "log likelihood joint" << std::endl ;
-    std::cerr << this->loglikelihood_joint << std::endl << std::endl ;
-    std::cerr << "post prob" << std::endl ;
-    std::cerr << this->post_prob       << std::endl << std::endl ;
-    std::cerr << "post state prob"     << std::endl ;
-    std::cerr << this->post_state_prob << std::endl << std::endl ;
-    std::cerr << "post class prob"     << std::endl ;
-    std::cerr << this->post_class_prob  << std::endl << std::endl ;
-    std::cerr << "model" << std::endl ;
-    std::cerr << this->sequence_layer_list.front()->get_model()[0] << std::endl << std::endl ;
-    std::cerr << "--------------------------------------------" << std::endl << std::endl ;
-
-    // optimize the partition
-    for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
-    {
-        std::cerr << "model" << std::endl ;
-        std::cerr << this->sequence_layer_list.front()->get_model()[0] << std::endl << std::endl ;
-
-        // E-step
-        this->compute_loglikelihood() ;
-        this->compute_post_prob() ;
-        // M-step
-        this->compute_class_prob() ;
-        this->update_models() ;
-        this->center_post_state_prob() ;
-
-        std::cerr << "EM new" << std::endl ;
-        std::cerr << "log likelihood" << std::endl ;
-        std::cerr << this->loglikelihood[0] << std::endl << std::endl ;
-        std::cerr << "log likelihood max" << std::endl ;
-        std::cerr << this->loglikelihood_max[0] << std::endl << std::endl ;
-        std::cerr << "log likelihood joint" << std::endl ;
-        std::cerr << this->loglikelihood_joint << std::endl << std::endl ;
-        std::cerr << "post prob" << std::endl ;
-        std::cerr << this->post_prob       << std::endl << std::endl ;
-        std::cerr << "post state prob"     << std::endl ;
-        std::cerr << this->post_state_prob << std::endl << std::endl ;
-        std::cerr << "post class prob"     << std::endl ;
-        std::cerr << this->post_class_prob  << std::endl << std::endl ;
-        std::cerr << "model" << std::endl ;
-        std::cerr << this->sequence_layer_list.front()->get_model()[0] << std::endl << std::endl ;
-        std::cerr << "--------------------------------------------" << std::endl << std::endl ;
-
-
-        bar.update() ;
-    }
-    bar.update() ; std::cerr << std::endl ;
-    return EMEngine::exit_codes::SUCCESS ;
-}
-
-void EMEngine::set_state_prob_uniform()
-{   double sum = this->n_class * this->n_shift * this->n_flip ;
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->n_shift; j++)
-        {   for(size_t k=0; k<this->n_flip; k++)
-            {   this->post_state_prob[i][j][k] = 1./sum ; }
-        }
-    }
-}
-
-void EMEngine::compute_loglikelihood()
-{   // compute the loglikelihood for each layer
-    size_t i = 0 ;
-    for(auto& ptr : this->read_layer_list)
-    {   ptr->compute_loglikelihoods(this->loglikelihood[i],
-                                    this->loglikelihood_max[i],
-                                    this->threads) ;
-        i++ ;
-    }
-    for(auto& ptr : this->sequence_layer_list)
-    {   ptr->compute_loglikelihoods(this->loglikelihood[i],
-                                    this->loglikelihood_max[i],
-                                    this->threads) ;
-        i++ ;
-    }
-    // sum the likelihood for each state, over all layers
-    // this is the "joint likelihood"
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t k=0; k<this->n_shift; k++)
-            {   for(size_t l=0; l<this->n_flip; l++)
-                {   // reset
-                    this->loglikelihood_joint[i][j][k][l] = 0. ;
-                    std::cerr << "loglikelihood_joint[" << i << "]["
-                                                        << j << "]["
-                                                        << k << "]["
-                                                        << l << " = " ;
-                    // sum
-                    for(size_t m=0; m<this->n_layer; m++)
-                    {   this->loglikelihood_joint[i][j][k][l] +=
-                                (this->loglikelihood[m][i][j][k][l] -
-                                 this->loglikelihood_max[m][i]) ;
-                        std::cerr << this->loglikelihood[m][i][j][k][l]
-                                  << "-"
-                                  << this->loglikelihood_max[m][i]
-                                     << " " ;
-                    }
-                    std::cerr << std::endl ;
-                }
-            }
-        }
-    }
-}
-
-void EMEngine::compute_post_prob()
-{   // don't parallelize
-    if(this->threads == nullptr)
-    {   std::promise<vector_d> promise ;
-        std::future<vector_d> future = promise.get_future() ;
-        this->compute_post_prob_routine(0, this->n_row, promise) ;
-        // compute the sum of post prob and the per class sum of post prob
-        // from the partial results computed on each slice
-        this->post_prob_tot = 0. ;
-        this->post_prob_colsum = future.get() ;
-        for(const auto& prob : this->post_prob_colsum)
-        {   this->post_prob_tot += prob ; }
-    }
-    // parallelize
-    else
-    {    size_t n_threads = this->threads->getNThread() ;
-
-        // compute the slices on which each thread will work
-        std::vector<std::pair<size_t,size_t>> slices =
-                ThreadPool::split_range(0, this->n_row,n_threads) ;
-
-        // get promises and futures
-        // the function run by the threads will compute
-        // the partial sum per class of post_prob for the given slice
-        // this should be used to compute the complete sum of post_prob
-        // and the complete sum per class of post_prob
-        std::vector<std::promise<vector_d>> promises(n_threads) ;
-        std::vector<std::future<vector_d>>  futures(n_threads) ;
-        for(size_t i=0; i<n_threads; i++)
-        {   futures[i] = promises[i].get_future() ; }
-
-        // distribute work to threads
-        // -------------------------- threads start --------------------------
-        for(size_t i=0; i<n_threads; i++)
-        {   auto slice = slices[i] ;
-            this->threads->addJob(std::move(
-                                      std::bind(&EMEngine::compute_post_prob_routine,
-                                                this,
-                                                slice.first,
-                                                slice.second,
-                                                std::ref(promises[i])))) ;
-        }
-        // wait until all threads are done working
-        // compute the sum of post prob and the per class sum of post prob
-        // from the partial results computed on each slice
-        this->post_prob_tot = 0. ;
-        this->post_prob_colsum = vector_d(this->n_class, 0.) ;
-        for(auto& future : futures)
-        {   auto probs = future.get() ;
-            for(size_t i=0; i<this->n_class; i++)
-            {   double prob = probs[i] ;
-                this->post_prob_colsum[i] += prob ;
-                this->post_prob_tot       += prob ;
-            }
-        }
-        // -------------------------- threads stop ---------------------------
-    }
-}
-
-
-void EMEngine::compute_post_prob_routine(size_t from,
-                                         size_t to,
-                                         std::promise<vector_d>& post_prob_colsum)
-{   vector_d colsums(this->n_class, 0.) ;
-
-    // reset grand total
-    // this->post_prob_tot = 0 ;
-    // this->post_prob_colsum = vector_d(n_class, 0) ;
-
-    // post prob
-    for(size_t i=from; i<to; i++)
-    {   // reset row sum to 0
-        this->post_prob_rowsum[i] = 0. ;
-
-        for(size_t n_class=0; n_class<this->n_class; n_class++)
-        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
-                {
-                    double p = std::max(exp(this->loglikelihood_joint[i][n_class][n_shift][n_flip]) *
-                                        this->post_state_prob[n_class][n_shift][n_flip],
-                                        DataLayer::p_min) ;
-                    this->post_prob[i][n_class][n_shift][n_flip] = p ;
-                    this->post_prob_rowsum[i] += p ;
-                }
-            }
-        }
-        // normalize
-        for(size_t n_class=0; n_class<this->n_class; n_class++)
-        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
-                {   this->post_prob[i][n_class][n_shift][n_flip] /=
-                            this->post_prob_rowsum[i] ;
-                    double p = this->post_prob[i][n_class][n_shift][n_flip] ;
-                    colsums[n_class] += p ;
-                    // this->post_prob_colsum[n_class] += p ;
-                    // this->post_prob_tot += p ;
-                }
-            }
-        }
-    }
-    post_prob_colsum.set_value(colsums) ;
-}
-
-void EMEngine::compute_class_prob()
-{
-    for(size_t n_class=0; n_class<this->n_class; n_class++)
-    {   // reset total
-        this->post_class_prob[n_class] = 0. ;
-        for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
-        {   for(size_t flip=0; flip<this->n_flip; flip++)
-            {   // sum
-                this->post_state_prob[n_class][n_shift][flip] = 0. ;
-                for(size_t i=0; i<this->n_row; i++)
-                {   this->post_state_prob[n_class][n_shift][flip] +=
-                                                this->post_prob[i][n_class][n_shift][flip] ;
-                }
-                // normalize
-                this->post_state_prob[n_class][n_shift][flip] /= this->post_prob_tot ;
-                this->post_class_prob[n_class] += this->post_state_prob[n_class][n_shift][flip] ;
-            }
-        }
-    }
-}
-
-void EMEngine::update_models()
-{   // read data and models
-    for(auto& ptr : this->read_layer_list)
-    {   ptr->update_model(this->post_prob,
-                          this->post_prob_colsum,
-                          this->threads) ;
-    }
-    // sequence data and models
-    for(auto& ptr : this->sequence_layer_list)
-    {   ptr->update_model(this->post_prob,
-                          this->threads) ;
-    }
-}
-
-void EMEngine::center_post_state_prob()
-{
-    if(this->n_shift == 1)
-    {   return ; }
-
-    // the possible shift states
-    vector_d shifts(this->n_shift) ;
-    std::iota(shifts.begin(), shifts.end(), 1.) ;
-
-    // the shift probabilities and the class probabilies
-    // (no need to norm., class_prob sums to 1)
-    double shifts_prob_measured_tot = 0. ;
-    std::vector<double> shifts_prob_measured(this->n_shift) ;
-    for(size_t s=0; s<this->n_shift; s++)
-    {   for(size_t k=0; k<this->n_class; k++)
-        {   for(size_t f=0; f<this->n_flip; f++)
-            {   shifts_prob_measured[s]  += this->post_state_prob[k][s][f] ;
-                shifts_prob_measured_tot += this->post_state_prob[k][s][f] ;
-            }
-        }
-    }
-
-
-    // the shift mean and (biased) standard deviation
-    double shifts_sd = sd(shifts, shifts_prob_measured, false) ;
-
-    // the shift probabilities under the assumption that is
-    // distributed as a gaussian centered on
-    // the central shift state with sd and mean as in the data
-    // sd as the data
-    vector_d shifts_prob_centered(shifts.size(), 0.) ;
-    double shifts_prob_centered_tot = 0. ;
-    for(size_t i=0; i<shifts.size(); i++)
-    {   shifts_prob_centered[i]   = normal_pmf(shifts[i],
-                                               (this->n_shift/2)+1, shifts_sd) ;
-        shifts_prob_centered_tot += shifts_prob_centered[i] ;
-    }
-
-    for(size_t k=0; k<this->n_class; k++)
-    {   for(size_t f=0; f<this->n_flip; f++)
-        {   for(size_t s=0; s<this->n_shift; s++)
-            {   this->post_state_prob[k][s][f] = this->post_class_prob[k] *
-                                                 shifts_prob_centered[s] /
-                                                (this->n_flip * shifts_prob_centered_tot) ;
-            }
-        }
-    }
-
-    // shifts_prob_measured_tot = 0. ;
-    shifts_prob_measured.clear() ;
-    shifts_prob_measured.resize(this->n_shift) ;
-    for(size_t s=0; s<this->n_shift; s++)
-    {   for(size_t k=0; k<this->n_class; k++)
-        {   for(size_t f=0; f<this->n_flip; f++)
-            {   shifts_prob_measured[s]  +=
-                        this->post_state_prob[k][s][f] ;
-            }
-        }
-    }
-}
diff --git a/src/Clustering/EMEngine.hpp b/src/Clustering/EMEngine.hpp
deleted file mode 100644
index 1b15e16..0000000
--- a/src/Clustering/EMEngine.hpp
+++ /dev/null
@@ -1,307 +0,0 @@
-#ifndef EMENGINE_HPP
-#define EMENGINE_HPP
-
-#include <iostream>
-#include <vector>
-#include <string>
-#include <list>
-#include <future>    // std::promise
-
-#include <DataLayer.hpp>
-#include <ReadLayer.hpp>
-#include <SequenceLayer.hpp>
-#include <matrices.hpp>
-#include <ThreadPool.hpp>
-
-
-/*!
- * \brief This class implements the iterative an expectation
- * maximization classification procedure to discover
- * patterns in ChIP-seq (and related data) data, as described
- * in Nair et al. 2014, Bioinformatics.
- * However, the classification procedure has been generalized
- * such that genomic regions can be partitioned according
- * to several different signal at the same time, instead
- * of just one as in the original paper. Additionally, it
- * is possible to include the underlying DNA sequence such
- * that the partitioning procedure will find i) ChIP-seq
- * data signal patterns and ii) DNA sequence motifs at
- * the same time.
- * To mitigate a miss-alignment of the signal/sequences in
- * the different regions - that is a same signal strech/sequence
- * motif is present in two regions but at different offsets -
- * the classification procedure can search protypic signals
- * shorter than a whole region, at each possible offset over the
- * region (named shift states).
- * To mitigate an inversion of the signal/sequence in the different
- * regions - that is a same signal strech/sequence motif is present
- * in two regions but in reverse orientation - the classification
- * procedure can search protypic signals in both orientation.
- */
-class EMEngine
-{
-    public:
-        /*!
-         * \brief The possible seeding strategies.
-         */
-        enum seeding_codes {RANDOM=0, SAMPLING, TOY} ;
-
-        /*!
-         * \brief The possible exit codes for the cluster method.
-         * 0 the clustering procedure converged, 1 the clustering
-         * procedure succeeded without converging, 2 the clustering
-         * failed.
-         */
-        enum exit_codes {CONVERGENCE=0, SUCCESS, FAILURE} ;
-
-    public:
-        /*!
-         * \brief Constructs an object to partition the
-         * region according to all the givend data layers
-         * with the given shifting and flipping freedom.
-         * \param read_matrices a vector containing all
-         * the different different data densities (ChIP-seq
-         * or related signal) for the regions of interest.
-         * \param seq_matrices a vector containing the DNA
-         * sequences for the regions of interest.
-         * \param n_class the number of region classes
-         * to search.
-         * \param n_iter the number of optimization iterations.
-         * \param n_shift the number of shift states allowed.
-         * \param flip whether flipping is allowed.
-         * \param seeding how to initialise the signal/sequence
-         * models.
-         * \param seed a seed to initialise the random number
-         * generator.
-         * \param n_threads the number of parallel threads
-         * to run the computations. 0 means no parallel
-         * computing, everything is run on the main thread.
-         */
-        EMEngine(const std::vector<matrix2d_i>& read_matrices,
-                 const std::vector<matrix2d_i>& seq_matrices,
-                 size_t n_class,
-                 size_t n_iter,
-                 size_t n_shift,
-                 bool flip,
-                 EMEngine::seeding_codes seeding,
-                 const std::string& seed="",
-                 size_t n_threads=0) ;
-        /*!
-         * Destructor.
-         */
-        ~EMEngine() ;
-
-        /*!
-         * \brief Returns all read models.
-         * The models are in the same order
-         * as the data were given to the
-         * constructor.
-         * \return a vector containing the
-         * models.
-         */
-        std::vector<matrix3d_d> get_read_models() const ;
-
-        /*!
-         * \brief Returns all sequence models.
-         * The models are in the same order
-         * as the data were given to the
-         * constructor.
-         * \return a vector containing the
-         * models.
-         */
-        std::vector<matrix3d_d> get_sequence_models() const ;
-
-        /*!
-         * \brief Returns the posterior probability
-         * of each point belonging to each class, for
-         * each possible shift and flip state.
-         * \return the posterior probability matrix,
-         * with the following dimensions :
-         * 1st dim : the data points
-         * 2nd dim : the classes
-         * 3rd dim : the shift states
-         * 4th dim : the flip states
-         */
-        matrix4d_d get_post_prob() const ;
-
-        /*!
-         * \brief Returns the posterior class
-         * probabilities (the total class
-         * probability over all shift and
-         * flip states).
-         * \return the posterior class
-         * probabilities.
-         */
-        vector_d get_post_class_prob() const ;
-
-        /*!
-         * \brief Runs the models optimization and the
-         * data classification.
-         * \return a code indicating how the optimization
-         * ended.
-         */
-        EMEngine::exit_codes classify() ;
-
-    protected:
-
-        /*!
-         * \brief Sets the posterior
-         * probabilities randomly (by
-         * sampling them from a beta
-         * distribution) and update all
-         * other probabilities accordingly..
-         */
-        void set_post_prob_random() ;
-
-        /*!
-         * \brief Sets all the state probabilities
-         * (all shift and flip states in all classes)
-         * to a uniform probability.
-         */
-        void set_state_prob_uniform() ;
-
-        /*!
-         * \brief Computes the data log likelihood given the
-         * current models, for all layers and the joint
-         * likelihood for each state (the sum of the layer
-         * likelihoods for all layers, for a given state).
-         */
-        void compute_loglikelihood() ;
-
-        /*!
-         * \brief Computes the data posterior probabilties.
-         */
-        void compute_post_prob() ;
-
-        /*!
-         * \brief The routine that effectively computes
-         * the posterior probabilties.
-         * \param from the index of the first row
-         * in the data to consider.
-         * \param to the index of the past last row
-         * in the data to consider.
-         * \param done the partial column (over the classes)
-         * sum of posterior probabilities. If several routines
-         * are running together, the colsums are retrieved by
-         * summing up the vectors together.
-         */
-        void compute_post_prob_routine(size_t from,
-                                       size_t to,
-                                       std::promise<vector_d>& post_prob_colsum) ;
-
-        /*!
-         * \brief Computes the class/state probabilities from the
-         * posterior probabilities.
-         */
-        void compute_class_prob() ;
-
-        /*!
-         * \brief Update the data models for all layers, given
-         * the current posterior and class probabilities.
-         */
-        void update_models() ;
-
-        /*!
-         * \brief Modifies the state probabilities in such a
-         * way that the state probabilities are then normaly
-         * distributed, centered on the middle shift state.
-         * However, the overall class probabilities remain
-         * unchanged.
-         */
-        void center_post_state_prob() ;
-
-        /*!
-         * \brief the number of data layers.
-         */
-        size_t n_layer ;
-        /*!
-         * \brief the number of rows in data.
-         */
-        size_t n_row ;
-        /*!
-         * \brief the number of columns in data.
-         */
-        size_t n_col ;
-        /*!
-         * \brief the number of classes.
-         */
-        size_t n_class ;
-        /*!
-         * \brief the number of shift states.
-         */
-        size_t n_shift ;
-        /*!
-         * \brief zhe number of flip states.
-         */
-        size_t n_flip ;
-        /*!
-         * \brief the number of iterations.
-         */
-        size_t n_iter ;
-        /*!
-         * \brief whther flip is allowed.
-         */
-        bool flip ;
-        /*!
-         * \brief the length of the models.
-         */
-        size_t l_model ;
-
-        /*!
-         * \brief the log likelihoods.
-         * One per data layer.
-         */
-        std::vector<matrix4d_d> loglikelihood ;
-        /*!
-         * \brief the max log likelihood value for each row.
-         * One per data layer.
-         */
-        std::vector<vector_d> loglikelihood_max ;
-        /*!
-         * \brief the joint loglikelihood, through all
-         * layers, for each state (each class for each
-         * shift and flip state).
-         */
-        matrix4d_d loglikelihood_joint ;
-        /*!
-         * \brief the posterior probabilities.
-         */
-        matrix4d_d post_prob ;
-        /*!
-         * \brief the states (shift and flip in each class)
-         * probabilities.
-         */
-        matrix3d_d post_state_prob ;
-        /*!
-         * \brief the total prob per class.
-         */
-        vector_d post_class_prob ;
-        /*!
-         * \brief the sum per row (data point) of post_prob.
-         */
-        vector_d post_prob_rowsum ;
-        /*!
-         * \brief the sum per column (class) of post_prob.
-         */
-        vector_d post_prob_colsum ;
-        /*!
-         * \brief the total of post_prob.
-         */
-        double post_prob_tot ;
-
-        /*!
-         * \brief the read data and their models.
-         */
-        std::list<ReadLayer*> read_layer_list ;
-        /*!
-         * \brief the sequence data and their models.
-         */
-        std::list<SequenceLayer*> sequence_layer_list ;
-        /*!
-         * \brief the threads.
-         */
-        ThreadPool* threads ;
-
-} ;
-
-#endif // EMENGINE_HPP
diff --git a/src/Clustering/EMEngine.hpp.save b/src/Clustering/EMEngine.hpp.save
deleted file mode 100644
index c3206b1..0000000
--- a/src/Clustering/EMEngine.hpp.save
+++ /dev/null
@@ -1,298 +0,0 @@
-#ifndef EMENGINE_HPP
-#define EMENGINE_HPP
-
-#include <iostream>
-#include <vector>
-#include <string>
-#include <list>
-#include <future>    // std::promise
-
-#include <DataLayer.hpp>
-#include <ReadLayer.hpp>
-#include <SequenceLayer.hpp>
-#include <matrices.hpp>
-#include <ThreadPool.hpp>
-
-
-/*!
- * \brief This class implements the iterative an expectation
- * maximization classification procedure to discover
- * patterns in ChIP-seq (and related data) data, as described
- * in Nair et al. 2014, Bioinformatics.
- * However, the classification procedure has been generalized
- * such that genomic regions can be partitioned according
- * to several different signal at the same time, instead
- * of just one as in the original paper. Additionally, it
- * is possible to include the underlying DNA sequence such
- * that the partitioning procedure will find i) ChIP-seq
- * data signal patterns and ii) DNA sequence motifs at
- * the same time.
- * To mitigate a miss-alignment of the signal/sequences in
- * the different regions - that is a same signal strech/sequence
- * motif is present in two regions but at different offsets -
- * the classification procedure can search protypic signals
- * shorter than a whole region, at each possible offset over the
- * region (named shift states).
- * To mitigate an inversion of the signal/sequence in the different
- * regions - that is a same signal strech/sequence motif is present
- * in two regions but in reverse orientation - the classification
- * procedure can search protypic signals in both orientation.
- */
-class EMEngine
-{
-    public:
-        /*!
-         * \brief The possible seeding strategies.
-         */
-        enum seeding_codes {RANDOM=0, SAMPLING, TOY} ;
-
-        /*!
-         * \brief The possible exit codes for the cluster method.
-         * 0 the clustering procedure converged, 1 the clustering
-         * procedure succeeded without converging, 2 the clustering
-         * failed.
-         */
-        enum exit_codes {CONVERGENCE=0, SUCCESS, FAILURE} ;
-
-    public:
-        /*!
-         * \brief Constructs an object to partition the
-         * region according to all the givend data layers
-         * with the given shifting and flipping freedom.
-         * \param read_matrices a vector containing all
-         * the different different data densities (ChIP-seq
-         * or related signal) for the regions of interest.
-         * \param seq_matrices a vector containing the DNA
-         * sequences for the regions of interest.
-         * \param n_class the number of region classes
-         * to search.
-         * \param n_iter the number of optimization iterations.
-         * \param n_shift the number of shift states allowed.
-         * \param flip whether flipping is allowed.
-         * \param seeding how to initialise the signal/sequence
-         * models.
-         * \param seed a seed to initialise the random number
-         * generator.
-         * \param n_threads the number of parallel threads
-         * to run the computations. 0 means no parallel
-         * computing, everything is run on the main thread.
-         */
-        EMEngine(const std::vector<matrix2d_i>& read_matrices,
-                 const std::vector<matrix2d_i>& seq_matrices,
-                 size_t n_class,
-                 size_t n_iter,
-                 size_t n_shift,
-                 bool flip,
-                 EMEngine::seeding_codes seeding,
-                 const std::string& seed="",
-                 size_t n_threads=0) ;
-        /*!
-         * Destructor.
-         */
-        ~EMEngine() ;
-
-        /*!
-         * \brief Returns all read models.
-         * The models are in the same order
-         * as the data were given to the
-         * constructor.
-         * \return a vector containing the
-         * models.
-         */
-        std::vector<matrix3d_d> get_read_models() const ;
-
-        /*!
-         * \brief Returns all sequence models.
-         * The models are in the same order
-         * as the data were given to the
-         * constructor.
-         * \return a vector containing the
-         * models.
-         */
-        std::vector<matrix3d_d> get_sequence_models() const ;
-
-        /*!
-         * \brief Returns the posterior probability
-         * of each point belonging to each class, for
-         * each possible shift and flip state.
-         * \return the posterior probability matrix,
-         * with the following dimensions :
-         * 1st dim : the data points
-         * 2nd dim : the classes
-         * 3rd dim : the shift states
-         * 4th dim : the flip states
-         */
-        matrix4d_d get_post_prob() const ;
-
-        /*!
-         * \brief Returns the posterior class
-         * probabilities (the total class
-         * probability over all shift and
-         * flip states).
-         * \return the posterior class
-         * probabilities.
-         */
-        vector_d get_post_class_prob() const ;
-
-        /*!
-         * \brief Runs the models optimization and the
-         * data classification.
-         * \return a code indicating how the optimization
-         * ended.
-         */
-        EMEngine::exit_codes classify() ;
-
-    protected:
-
-        /*!
-         * \brief Sets all the state probabilities
-         * (all shift and flip states in all classes)
-         * to a uniform probability.
-         */
-        void set_state_prob_uniform() ;
-
-        /*!
-         * \brief Computes the data log likelihood given the
-         * current models, for all layers and the joint
-         * likelihood for each state (the sum of the layer
-         * likelihoods for all layers, for a given state).
-         */
-        void compute_loglikelihood() ;
-
-        /*!
-         * \brief Computes the data posterior probabilties.
-         */
-        void compute_post_prob() ;
-
-        /*!
-         * \brief The routine that effectively computes
-         * the posterior probabilties.
-         * \param from the index of the first row
-         * in the data to consider.
-         * \param to the index of the past last row
-         * in the data to consider.
-         * \param done the partial column (over the classes)
-         * sum of posterior probabilities. If several routines
-         * are running together, the colsums are retrieved by
-         * summing up the vectors together.
-         */
-        void compute_post_prob_routine(size_t from,
-                                       size_t to,
-                                       std::promise<vector_d>& post_prob_colsum) ;
-
-        /*!
-         * \brief Computes the class/state probabilities from the
-         * posterior probabilities.
-         */
-        void compute_class_prob() ;
-
-        /*!
-         * \brief Update the data models for all layers, given
-         * the current posterior and class probabilities.
-         */
-        void update_models() ;
-
-        /*!
-         * \brief Modifies the state probabilities in such a
-         * way that the state probabilities are then normaly
-         * distributed, centered on the middle shift state.
-         * However, the overall class probabilities remain
-         * unchanged.
-         */
-        void center_post_state_prob() ;
-
-        /*!
-         * \brief the number of data layers.
-         */
-        size_t n_layer ;
-        /*!
-         * \brief the number of rows in data.
-         */
-        size_t n_row ;
-        /*!
-         * \brief the number of columns in data.
-         */
-        size_t n_col ;
-        /*!
-         * \brief the number of classes.
-         */
-        size_t n_class ;
-        /*!
-         * \brief the number of shift states.
-         */
-        size_t n_shift ;
-        /*!
-         * \brief zhe number of flip states.
-         */
-        size_t n_flip ;
-        /*!
-         * \brief the number of iterations.
-         */
-        size_t n_iter ;
-        /*!
-         * \brief whther flip is allowed.
-         */
-        bool flip ;
-        /*!
-         * \brief the length of the models.
-         */
-        size_t l_model ;
-
-        /*!
-         * \brief the log likelihoods.
-         * One per data layer.
-         */
-        std::vector<matrix4d_d> loglikelihood ;
-        /*!
-         * \brief the max log likelihood value for each row.
-         * One per data layer.
-         */
-        std::vector<vector_d> loglikelihood_max ;
-        /*!
-         * \brief the joint loglikelihood, through all
-         * layers, for each state (each class for each
-         * shift and flip state).
-         */
-        matrix4d_d loglikelihood_joint ;
-        /*!
-         * \brief the posterior probabilities.
-         */
-        matrix4d_d post_prob ;
-        /*!
-         * \brief the states (shift and flip in each class)
-         * probabilities.
-         */
-        matrix3d_d post_state_prob ;
-        /*!
-         * \brief the total prob per class.
-         */
-        vector_d post_class_prob ;
-        /*!
-         * \brief the sum per row (data point) of post_prob.
-         */
-        vector_d post_prob_rowsum ;
-        /*!
-         * \brief the sum per column (class) of post_prob.
-         */
-        vector_d post_prob_colsum ;
-        /*!
-         * \brief the total of post_prob.
-         */
-        double post_prob_tot ;
-
-        /*!
-         * \brief the read data and their models.
-         */
-        std::list<ReadLayer*> read_layer_list ;
-        /*!
-         * \brief the sequence data and their models.
-         */
-        std::list<SequenceLayer*> sequence_layer_list ;
-        /*!
-         * \brief the threads.
-         */
-        ThreadPool* threads ;
-
-} ;
-
-#endif // EMENGINE_HPP
diff --git a/src/Clustering/EMJoint.cpp b/src/Clustering/EMJoint.cpp
new file mode 100644
index 0000000..d26385a
--- /dev/null
+++ b/src/Clustering/EMJoint.cpp
@@ -0,0 +1,435 @@
+
+#include <EMJoint.hpp>
+
+#include <vector>
+#include <stdexcept>
+#include <future>        // std::promise, std::future
+#include <utility>       // std::pair, std::move()
+#include <functional>    // std::bind(), std::ref()
+
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
+#include <ReadLayer.hpp>
+#include <SequenceLayer.hpp>
+#include <ThreadPool.hpp>
+#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
+#include <ConsoleProgressBar.hpp>    // ConsoleProgressBar
+
+
+template<class T>
+std::ostream& operator << (std::ostream& stream,
+                           const std::vector<T>& v)
+{   for(const auto& t : v)
+    {   stream << t << " " ; }
+    return stream ;
+}
+
+EMJoint::EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
+                 size_t n_class,
+                 size_t n_iter,
+                 size_t n_shift,
+                 bool flip,
+                 const std::string& seed,
+                 size_t n_threads)
+    : EMBase(read_matrices[0].get_nrow(),
+             read_matrices[0].get_ncol(),
+             n_class,
+             n_iter,
+             n_shift,
+             flip,
+             n_threads),
+      n_layer(read_matrices.size()),
+      loglikelihood_layer(n_layer,
+                          Matrix4D<double>(this->n_row,
+                                           this->n_class,
+                                           this->n_shift,
+                                           this->n_flip,
+                                           0.)),
+      loglikelihood_max(this->n_layer,
+                        vector_d(this->n_row, 0.)),
+      read_layers(),
+      seq_layer(nullptr)
+
+{
+    // check data matrices and their dimensions
+    if(this->n_layer == 0)
+    {   throw std::invalid_argument("Error! No data layer given!") ; }
+    for(const auto& matrix : read_matrices)
+    {   if(matrix.get_nrow() != this->n_row)
+        {   char msg[4096] ;
+            sprintf(msg, "Error! Read layers have variable row numbers "
+                         "(%zu and %zu)!",
+                    matrix.get_nrow(), this->n_row) ;
+            throw std::invalid_argument(msg) ;
+        }
+        else if(matrix.get_ncol() != this->n_col)
+        {   char msg[4096] ;
+            sprintf(msg, "Error! Read layers have variable column numbers "
+                         "(%zu and %zu)!",
+                    matrix.get_ncol(), this->n_col) ;
+            throw std::invalid_argument(msg) ;
+        }
+    }
+
+    // initialise post prob randomly
+    // getRandomGenerator(seed) ;
+    this->set_post_prob_random(seed) ;
+
+    // data and models
+    // create read layer and initialise the models from the post prob
+    for(const auto& matrix : read_matrices)
+    {   // create the layer
+        this->read_layers.push_back(new ReadLayer(matrix,
+                                                  this->n_class,
+                                                  this->n_shift,
+                                                  this->flip,
+                                                  this->threads)) ;
+        this->read_layers.back()->update_model(this->post_prob,
+                                               this->threads) ;
+    }
+}
+
+EMJoint::EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
+                 const Matrix2D<int>& seq_matrix,
+                 size_t n_class,
+                 size_t n_iter,
+                 size_t n_shift,
+                 bool flip,
+                 const std::string& seed,
+                 size_t n_threads)
+    : EMBase(read_matrices[0].get_nrow(),
+             read_matrices[0].get_ncol(),
+             n_class,
+             n_iter,
+             n_shift,
+             flip,
+             n_threads),
+      n_layer(read_matrices.size()+1),
+      loglikelihood_layer(this->n_layer,
+                          Matrix4D<double>(this->n_row,
+                                           this->n_class,
+                                           this->n_shift,
+                                           this->n_flip,
+                                           0.)),
+      loglikelihood_max(this->n_layer,
+                        vector_d(this->n_row, 0.)),
+      read_layers(),
+      seq_layer(nullptr)
+{   // check data matrices and their dimensions
+    for(const auto& matrix : read_matrices)
+    {   if(matrix.get_nrow() != this->n_row)
+        {   char msg[4096] ;
+            sprintf(msg, "Error! A read matrix row number is different than expected "
+                         "(%zu instead of %zu)!",
+                    matrix.get_nrow(), this->n_row) ;
+            throw std::invalid_argument(msg) ;
+        }
+        else if(matrix.get_ncol() != this->n_col)
+        {   char msg[4096] ;
+            sprintf(msg, "Error! A read matrix column number is different than expected "
+                         "(%zu instead of %zu)!",
+                    matrix.get_ncol(), this->n_col) ;
+            throw std::invalid_argument(msg) ;
+        }
+    }
+    if(seq_matrix.get_nrow() != this->n_row)
+    {   char msg[4096] ;
+        sprintf(msg, "Error! A sequence matrix row number is different than expected "
+                     "(%zu instead of %zu)!",
+                seq_matrix.get_nrow(), this->n_row) ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(seq_matrix.get_ncol() != this->n_col)
+    {   char msg[4096] ;
+        sprintf(msg, "Error! A sequence matrix column number is different than expected "
+                     "(%zu instead of %zu)!",
+                seq_matrix.get_ncol(), this->n_col) ;
+        throw std::invalid_argument(msg) ;
+    }
+
+    // initialise post prob randomly
+    // getRandomGenerator(seed) ;
+    this->set_post_prob_random(seed) ;
+
+    // data and models
+    // create read layer and initialise the models from the post prob
+    for(const auto& matrix : read_matrices)
+    {   // create the layer
+        this->read_layers.push_back(new ReadLayer(matrix,
+                                                  this->n_class,
+                                                  this->n_shift,
+                                                  this->flip,
+                                                  this->threads)) ;
+        this->read_layers.back()->update_model(this->post_prob,
+                                               this->threads) ;
+    }
+    // create sequence layer and initialise the models from the post prob
+    this->seq_layer = new SequenceLayer(seq_matrix,
+                                        this->n_class,
+                                        this->n_shift,
+                                        this->flip,
+                                        false) ;
+    this->seq_layer->update_model(this->post_prob,
+                                  this->threads) ;
+}
+
+EMJoint::~EMJoint()
+{   // join the threads in case
+    // deleted by EMBase destructor
+    this->threads->join() ;
+
+    // read data and models
+    for(auto& ptr : this->read_layers)
+    {   if(ptr != nullptr)
+        {   delete ptr ;
+            ptr = nullptr ;
+        }
+    }
+    // sequence data and models
+    if(seq_layer != nullptr)
+    {   delete seq_layer ;
+        seq_layer = nullptr ;
+    }
+}
+
+std::vector<Matrix3D<double>> EMJoint::get_read_models() const
+{   std::vector<Matrix3D<double>> models ;
+    for(const auto& ptr : this->read_layers)
+    {   models.push_back(ptr->get_model()) ; }
+    return models ;
+}
+
+Matrix3D<double> EMJoint::get_sequence_models() const
+{   return this->seq_layer->get_model() ; }
+
+EMJoint::exit_codes EMJoint::classify()
+{
+    size_t bar_update_n = this->n_iter ;
+    ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;
+
+    // optimize the partition
+    for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
+    {   // E-step
+        this->compute_loglikelihood() ;
+        this->compute_post_prob() ;
+        // M-step
+        this->compute_class_prob() ;
+        this->update_models() ;
+        this->center_post_state_prob() ;
+        bar.update() ;
+    }
+    bar.update() ; std::cerr << std::endl ;
+    return EMJoint::exit_codes::ITER_MAX ;
+}
+
+void EMJoint::compute_loglikelihood()
+{   // compute the loglikelihood for each layer
+    size_t i = 0 ;
+    for(auto& ptr : this->read_layers)
+    {   ptr->compute_loglikelihoods(this->loglikelihood_layer[i],
+                                    this->loglikelihood_max[i],
+                                    this->threads) ;
+        i++ ;
+    }
+    this->seq_layer->compute_loglikelihoods(this->loglikelihood_layer[i],
+                                            this->loglikelihood_max[i],
+                                            this->threads) ;
+    i++ ;
+    /*
+    // sum the likelihood for each state, over all layers
+    // this is the "joint likelihood"
+    for(size_t i=0; i<this->n_row; i++)
+    {   for(size_t j=0; j<this->n_class; j++)
+        {   for(size_t k=0; k<this->n_shift; k++)
+            {   for(size_t l=0; l<this->n_flip; l++)
+                {
+                    // reset
+                    this->loglikelihood(i,j,k,l) = 0. ;
+                    // sum
+                    for(size_t m=0; m<this->n_layer; m++)
+                    {   this->loglikelihood(i,j,k,l) +=
+                                (this->loglikelihood_layer[m](i,j,k,l) -
+                                 this->loglikelihood_max[m][i]) ;
+                    }
+                }
+            }
+        }
+    }
+    */
+
+    // sum the likelihood for each state, over all layers
+    // and rescale the values
+    // don't parallelize
+    if(this->threads == nullptr)
+    {   std::promise<bool> promise ;
+        std::future<bool> future = promise.get_future() ;
+        this->compute_loglikelihood_routine(0,
+                                            this->n_row,
+                                            promise) ;
+        future.get() ;
+    }
+    // parallelize
+    else
+    {    size_t n_threads = this->threads->getNThread() ;
+
+        // compute the slices on which each thread will work
+        std::vector<std::pair<size_t,size_t>> slices =
+                ThreadPool::split_range(0, this->n_row,n_threads) ;
+
+        // get promises and futures
+        std::vector<std::promise<bool>> promises(n_threads) ;
+        std::vector<std::future<bool>>  futures(n_threads) ;
+        for(size_t i=0; i<n_threads; i++)
+        {   futures[i] = promises[i].get_future() ; }
+
+        // distribute work to threads
+        // -------------------------- threads start --------------------------
+        for(size_t i=0; i<n_threads; i++)
+        {   auto slice = slices[i] ;
+            this->threads->addJob(std::move(
+                                      std::bind(&EMJoint::compute_loglikelihood_routine,
+                                                this,
+                                                slice.first,
+                                                slice.second,
+                                                std::ref(promises[i])))) ;
+        }
+        // wait until all threads are done working
+        for(auto& future : futures)
+        {   future.get() ; }
+        // -------------------------- threads stop ---------------------------
+    }
+}
+
+void EMJoint::compute_loglikelihood_routine(size_t from,
+                                            size_t to,
+                                            std::promise<bool>& done)
+{
+    // limite value range
+    for(size_t i=from; i<to; i++)
+    {   for(size_t j=0; j<this->n_class; j++)
+        {   for(size_t k=0; k<this->n_shift; k++)
+            {   for(size_t l=0; l<this->n_flip; l++)
+                {
+                    // reset
+                    this->loglikelihood(i,j,k,l) = 0. ;
+                    // sum
+                    for(size_t m=0; m<this->n_layer; m++)
+                    {   this->loglikelihood(i,j,k,l) +=
+                                (this->loglikelihood_layer[m](i,j,k,l) -
+                                 this->loglikelihood_max[m][i]) ;
+                    }
+                }
+            }
+        }
+    }
+    done.set_value(true) ;
+}
+
+void EMJoint::compute_post_prob()
+{   // don't parallelize
+    if(this->threads == nullptr)
+    {   std::promise<vector_d> promise ;
+        std::future<vector_d> future = promise.get_future() ;
+        this->compute_post_prob_routine(0, this->n_row, promise) ;
+        // compute the sum of post prob and the per class sum of post prob
+        // from the partial results computed on each slice
+        this->post_prob_tot = 0. ;
+        this->post_prob_colsum = future.get() ;
+        for(const auto& prob : this->post_prob_colsum)
+        {   this->post_prob_tot += prob ; }
+    }
+    // parallelize
+    else
+    {    size_t n_threads = this->threads->getNThread() ;
+
+        // compute the slices on which each thread will work
+        std::vector<std::pair<size_t,size_t>> slices =
+                ThreadPool::split_range(0, this->n_row,n_threads) ;
+
+        // get promises and futures
+        // the function run by the threads will compute
+        // the partial sum per class of post_prob for the given slice
+        // this should be used to compute the complete sum of post_prob
+        // and the complete sum per class of post_prob
+        std::vector<std::promise<vector_d>> promises(n_threads) ;
+        std::vector<std::future<vector_d>>  futures(n_threads) ;
+        for(size_t i=0; i<n_threads; i++)
+        {   futures[i] = promises[i].get_future() ; }
+
+        // distribute work to threads
+        // -------------------------- threads start --------------------------
+        for(size_t i=0; i<n_threads; i++)
+        {   auto slice = slices[i] ;
+            this->threads->addJob(std::move(
+                                      std::bind(&EMJoint::compute_post_prob_routine,
+                                                this,
+                                                slice.first,
+                                                slice.second,
+                                                std::ref(promises[i])))) ;
+        }
+        // wait until all threads are done working
+        // compute the sum of post prob and the per class sum of post prob
+        // from the partial results computed on each slice
+        this->post_prob_tot = 0. ;
+        this->post_prob_colsum = vector_d(this->n_class, 0.) ;
+        for(auto& future : futures)
+        {   auto probs = future.get() ;
+            for(size_t i=0; i<this->n_class; i++)
+            {   double prob = probs[i] ;
+                this->post_prob_colsum[i] += prob ;
+                this->post_prob_tot       += prob ;
+            }
+        }
+        // -------------------------- threads stop ---------------------------
+    }
+}
+
+void EMJoint::compute_post_prob_routine(size_t from,
+                                        size_t to,
+                                        std::promise<vector_d>& post_prob_colsum)
+{   vector_d colsums(this->n_class, 0.) ;
+
+    // post prob
+    for(size_t i=from; i<to; i++)
+    {   // reset row sum to 0
+        this->post_prob_rowsum[i] = 0. ;
+        for(size_t n_class=0; n_class<this->n_class; n_class++)
+        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
+            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
+                {
+                    double p = exp(this->loglikelihood(i,n_class,n_shift,n_flip)) *
+                                   this->post_state_prob(n_class,n_shift,n_flip) ;
+                    this->post_prob(i,n_class,n_shift,n_flip) = p ;
+                    this->post_prob_rowsum[i] += p ;
+                }
+            }
+        }
+        // normalize
+        for(size_t n_class=0; n_class<this->n_class; n_class++)
+        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
+            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
+                {
+                    double p = std::max(this->post_prob(i,n_class,n_shift,n_flip) /
+                                        this->post_prob_rowsum[i],
+                                        ReadLayer::p_min) ;
+                    this->post_prob(i,n_class,n_shift,n_flip) = p ;
+                    colsums[n_class] += p ;
+                }
+            }
+        }
+    }
+    post_prob_colsum.set_value(colsums) ;
+}
+
+void EMJoint::update_models()
+{   // read data and models
+    for(auto& ptr : this->read_layers)
+    {   ptr->update_model(this->post_prob,
+                          this->post_prob_colsum,
+                          this->threads) ;
+    }
+    // sequence data and models
+    this->seq_layer->update_model(this->post_prob,
+                                  this->threads) ;
+}
diff --git a/src/Clustering/EMJoint.hpp b/src/Clustering/EMJoint.hpp
new file mode 100644
index 0000000..adb1f0e
--- /dev/null
+++ b/src/Clustering/EMJoint.hpp
@@ -0,0 +1,198 @@
+#ifndef EMJOINT_HPP
+#define EMJOINT_HPP
+
+
+#include <EMBase.hpp>
+
+#include <vector>
+#include <string>
+
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
+#include <ReadLayer.hpp>
+#include <SequenceLayer.hpp>
+
+
+typedef std::vector<double> vector_d ;
+
+class EMJoint : public EMBase
+{
+    public:
+
+        /*!
+         * \brief Constructs an object to partition the
+         * region according to all the given read densities
+         * with the given shifting and flipping freedom.
+         * \param read_matrices a vector containing all
+         * the different data densities (ChIP-seq or related
+         * signal) for the regions of interest.
+         * \param seq_matrix a matrix containing the DNA
+         * sequences for the regions of interest.
+         * \param n_class the number of region classes
+         * to search.
+         * \param n_iter the number of optimization iterations.
+         * \param n_shift the number of shift states allowed.
+         * \param flip whether flipping is allowed.
+         * \param seed a seed to initialise the random number
+         * generator.
+         * \param n_threads the number of parallel threads
+         * to run the computations. 0 means no parallel
+         * computing, everything is run on the main thread.
+         */
+        EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
+                size_t n_class,
+                size_t n_iter,
+                size_t n_shift,
+                bool flip,
+                const std::string& seed="",
+                size_t n_threads=0) ;
+
+        /*!
+         * \brief Constructs an object to partition the
+         * region according to all the given read densities
+         * and region sequences with the given shifting and
+         * flipping freedom.
+         * \param read_matrices a vector containing all
+         * the different data densities (ChIP-seq or related
+         * signal) for the regions of interest.
+         * \param seq_matrix a matrix containing the DNA
+         * sequences for the regions of interest.
+         * \param n_class the number of region classes
+         * to search.
+         * \param n_iter the number of optimization iterations.
+         * \param n_shift the number of shift states allowed.
+         * \param flip whether flipping is allowed.
+         * \param seed a seed to initialise the random number
+         * generator.
+         * \param n_threads the number of parallel threads
+         * to run the computations. 0 means no parallel
+         * computing, everything is run on the main thread.
+         */
+        EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
+                const Matrix2D<int>& seq_matrix,
+                size_t n_class,
+                size_t n_iter,
+                size_t n_shift,
+                bool flip,
+                const std::string& seed="",
+                size_t n_threads=0) ;
+
+        EMJoint(const EMJoint& other) = delete ;
+
+        /*!
+         * \brief Destructor.
+         */
+        virtual ~EMJoint() override ;
+
+        /*!
+         * \brief Returns all layer read models.
+         * The models are in the same order
+         * as the data were given to the
+         * constructor.
+         * \return a vector containing the
+         * models.
+         */
+        std::vector<Matrix3D<double>> get_read_models() const ;
+
+        /*!
+         * \brief Returns the sequence models.
+         * \return a vector containing the
+         * models.
+         */
+        Matrix3D<double> get_sequence_models() const ;
+
+        /*!
+         * \brief Runs the sequence model optimization and
+         * the data classification.
+         * \return a code indicating how the optimization
+         * ended.
+         */
+        virtual EMJoint::exit_codes classify() override ;
+
+    private:
+
+        /*!
+         * \brief Computes the data log likelihood given the
+         * current models, for all layers and the joint
+         * likelihood for each state (the sum of the layer
+         * likelihoods for all layers, for a given state).
+         */
+        virtual void compute_loglikelihood() override ;
+
+        /*!
+         * \brief This is a routine of compute_loglikelihood() that
+         * computes the joint loglikelihood by summing the
+         * individual loglikelihood obtained from each data layer.
+         * At the same time, this method rescales the loglikelihood
+         * values by substacting to each value the maximum
+         * loglikelihood value found in the same data row,
+         * for each layer.
+         * \param from the index of the first row
+         * in the data to consider.
+         * \param to the index of the past last row
+         * in the data to consider.
+         * \param done a promise to fill when the method
+         * is done.
+         */
+        void compute_loglikelihood_routine(size_t from,
+                                           size_t to,
+                                           std::promise<bool>& done) ;
+
+        /*!
+         * \brief Computes the data posterior probabilties.
+         */
+        virtual void compute_post_prob() override ;
+
+        /*!
+         * \brief The routine that effectively computes
+         * the posterior probabilties.
+         * \param from the index of the first row
+         * in the data to consider.
+         * \param to the index of the past last row
+         * in the data to consider.
+         * \param done the partial column (over the classes)
+         * sum of posterior probabilities. If several routines
+         * are running together, the colsums are retrieved by
+         * summing up the vectors together.
+         */
+        void compute_post_prob_routine(size_t from,
+                                       size_t to,
+                                       std::promise<vector_d>& post_prob_colsum) ;
+
+        /*!
+         * \brief Update the data models for all layers, given
+         * the current posterior and class probabilities.
+         */
+        virtual void update_models() override ;
+
+        /*!
+         * \brief the number of data layers.
+         */
+        size_t n_layer ;
+        /*!
+         * \brief the log likelihood buffers for each individual
+         * layer (one element per layer).
+         */
+        std::vector<Matrix4D<double>> loglikelihood_layer ;
+        /*!
+         * \brief the max loglikelihood value for
+         * each each data layer (1st dimension)
+         * and each data row of the given layer
+         * (2nd dimension).
+         */
+        std::vector<vector_d> loglikelihood_max ;
+        /*!
+         * \brief A vector containing the pointers
+         * to the objects managing all the read
+         * layer data and models.
+         */
+        std::vector<ReadLayer*> read_layers ;
+        /*!
+         * \brief A pointer to the object managing
+         * the data and their model.
+         */
+        SequenceLayer* seq_layer ;
+} ;
+
+#endif // EMJOINT_HPP
diff --git a/src/Clustering/EMRead.cpp b/src/Clustering/EMRead.cpp
new file mode 100644
index 0000000..872cd54
--- /dev/null
+++ b/src/Clustering/EMRead.cpp
@@ -0,0 +1,265 @@
+#include <EMRead.hpp>
+
+#include <string>
+#include <vector>
+#include <future>                    // std::promise, std::future
+#include <utility>                   // std::pair, std::move()
+#include <functional>                // std::bind(), std::ref()
+#include <cmath>                     // exp()
+
+#include <ReadLayer.hpp>             // ReadLayer
+#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
+#include <ConsoleProgressBar.hpp>    // ConsoleProgressBar
+#include <ThreadPool.hpp>            // ThreadPool
+
+
+
+EMRead::EMRead(const Matrix2D<int>& read_matrix,
+               size_t n_class,
+               size_t n_iter,
+               size_t n_shift,
+               bool flip,
+               const std::string& seed,
+               size_t n_threads)
+    : EMBase(read_matrix.get_nrow(),
+             read_matrix.get_ncol(),
+             n_class,
+             n_iter,
+             n_shift,
+             flip,
+             n_threads),
+      loglikelihood_max(n_row, 0.),
+      read_layer(nullptr)
+{   this->loglikelihood_max = vector_d(n_row, 0.) ;
+
+    // initialise post prob randomly
+    this->set_post_prob_random(seed) ;
+    // data and models
+    this->read_layer = new ReadLayer(read_matrix,
+                                     this->n_class,
+                                     this->n_shift,
+                                     flip,
+                                     this->threads) ;
+    // intialise the models with the post prob
+    this->read_layer->update_model(this->post_prob,
+                                  this->threads) ;
+}
+
+EMRead::~EMRead()
+{   if(this->read_layer == nullptr)
+    {   delete this->read_layer ;
+        this->read_layer = nullptr ;
+    }
+}
+
+Matrix3D<double> EMRead::get_read_models() const
+{   return read_layer->get_model() ; }
+
+EMRead::exit_codes EMRead::classify()
+{   size_t bar_update_n = this->n_iter ;
+    ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;
+
+    // optimize the partition
+    for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
+    {   // E-step
+        this->compute_loglikelihood() ;
+        this->compute_post_prob() ;
+        // M-step
+        this->compute_class_prob() ;
+        this->update_models() ;
+        this->center_post_state_prob() ;
+
+        bar.update() ;
+    }
+    bar.update() ; std::cerr << std::endl ;
+    return EMRead::exit_codes::ITER_MAX ;
+}
+
+void EMRead::compute_loglikelihood()
+{   // compute the loglikelihood
+    this->read_layer->compute_loglikelihoods(this->loglikelihood,
+                                             this->loglikelihood_max,
+                                             this->threads) ;
+
+    /*
+    // rescale the values
+    for(size_t i=0; i<this->n_row; i++)
+    {   for(size_t j=0; j<this->n_class; j++)
+        {   for(size_t k=0; k<this->n_shift; k++)
+            {   for(size_t l=0; l<this->n_flip; l++)
+                {   this->loglikelihood(i,j,k,l) =
+                            (this->loglikelihood(i,j,k,l) -
+                             this->loglikelihood_max[i]) ;
+                }
+            }
+        }
+    }
+    */
+
+    // rescale the values
+    // don't parallelize
+    if(this->threads == nullptr)
+    {   std::promise<bool> promise ;
+        std::future<bool> future = promise.get_future() ;
+        this->compute_loglikelihood_routine(0,
+                                            this->n_row,
+                                            promise) ;
+        future.get() ;
+    }
+    // parallelize
+    else
+    {    size_t n_threads = this->threads->getNThread() ;
+
+        // compute the slices on which each thread will work
+        std::vector<std::pair<size_t,size_t>> slices =
+                ThreadPool::split_range(0, this->n_row,n_threads) ;
+
+        // get promises and futures
+        std::vector<std::promise<bool>> promises(n_threads) ;
+        std::vector<std::future<bool>>  futures(n_threads) ;
+        for(size_t i=0; i<n_threads; i++)
+        {   futures[i] = promises[i].get_future() ; }
+
+        // distribute work to threads
+        // -------------------------- threads start --------------------------
+        for(size_t i=0; i<n_threads; i++)
+        {   auto slice = slices[i] ;
+            this->threads->addJob(std::move(
+                                      std::bind(&EMRead::compute_loglikelihood_routine,
+                                                this,
+                                                slice.first,
+                                                slice.second,
+                                                std::ref(promises[i])))) ;
+        }
+        // wait until all threads are done working
+        for(auto& future : futures)
+        {   future.get() ; }
+        // -------------------------- threads stop ---------------------------
+    }
+}
+
+void EMRead::compute_loglikelihood_routine(size_t from,
+                                           size_t to,
+                                           std::promise<bool>& done)
+{
+    // rescale the values
+    for(size_t i=from; i<to; i++)
+    {   for(size_t j=0; j<this->n_class; j++)
+        {   for(size_t k=0; k<this->n_shift; k++)
+            {   for(size_t l=0; l<this->n_flip; l++)
+                {   this->loglikelihood(i,j,k,l) =
+                            (this->loglikelihood(i,j,k,l) -
+                             this->loglikelihood_max[i]) ;
+                }
+            }
+        }
+    }
+    done.set_value(true) ;
+}
+
+void EMRead::compute_post_prob()
+{   // don't parallelize
+    if(this->threads == nullptr)
+    {   std::promise<vector_d> promise ;
+        std::future<vector_d> future = promise.get_future() ;
+        this->compute_post_prob_routine(0, this->n_row, promise) ;
+        // compute the sum of post prob and the per class sum of post prob
+        // from the partial results computed on each slice
+        this->post_prob_tot = 0. ;
+        this->post_prob_colsum = future.get() ;
+        for(const auto& prob : this->post_prob_colsum)
+        {   this->post_prob_tot += prob ; }
+    }
+    // parallelize
+    else
+    {    size_t n_threads = this->threads->getNThread() ;
+
+        // compute the slices on which each thread will work
+        std::vector<std::pair<size_t,size_t>> slices =
+                ThreadPool::split_range(0, this->n_row,n_threads) ;
+
+        // get promises and futures
+        // the function run by the threads will compute
+        // the partial sum per class of post_prob for the given slice
+        // this should be used to compute the complete sum of post_prob
+        // and the complete sum per class of post_prob
+        std::vector<std::promise<vector_d>> promises(n_threads) ;
+        std::vector<std::future<vector_d>>  futures(n_threads) ;
+        for(size_t i=0; i<n_threads; i++)
+        {   futures[i] = promises[i].get_future() ; }
+
+        // distribute work to threads
+        // -------------------------- threads start --------------------------
+        for(size_t i=0; i<n_threads; i++)
+        {   auto slice = slices[i] ;
+            this->threads->addJob(std::move(
+                                      std::bind(&EMRead::compute_post_prob_routine,
+                                                this,
+                                                slice.first,
+                                                slice.second,
+                                                std::ref(promises[i])))) ;
+        }
+        // wait until all threads are done working
+        // compute the sum of post prob and the per class sum of post prob
+        // from the partial results computed on each slice
+        this->post_prob_tot = 0. ;
+        this->post_prob_colsum = vector_d(this->n_class, 0.) ;
+        for(auto& future : futures)
+        {   auto probs = future.get() ;
+            for(size_t i=0; i<this->n_class; i++)
+            {   double prob = probs[i] ;
+                this->post_prob_colsum[i] += prob ;
+                this->post_prob_tot       += prob ;
+            }
+        }
+        // -------------------------- threads stop ---------------------------
+    }
+}
+
+
+void EMRead::compute_post_prob_routine(size_t from,
+                                         size_t to,
+                                         std::promise<vector_d>& post_prob_colsum)
+{   vector_d colsums(this->n_class, 0.) ;
+
+    // reset grand total
+    // this->post_prob_tot = 0 ;
+    // this->post_prob_colsum = vector_d(n_class, 0) ;
+
+    // post prob
+    for(size_t i=from; i<to; i++)
+    {   // reset row sum to 0
+        this->post_prob_rowsum[i] = 0. ;
+        for(size_t n_class=0; n_class<this->n_class; n_class++)
+        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
+            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
+                {
+                    double p = exp(this->loglikelihood(i,n_class,n_shift,n_flip)) *
+                                   this->post_state_prob(n_class,n_shift,n_flip) ;
+                    this->post_prob(i,n_class,n_shift,n_flip) = p ;
+                    this->post_prob_rowsum[i] += p ;
+                }
+            }
+        }
+        // normalize
+        for(size_t n_class=0; n_class<this->n_class; n_class++)
+        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
+            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
+                {
+                    double p = std::max(this->post_prob(i,n_class,n_shift,n_flip) /
+                                        this->post_prob_rowsum[i],
+                                        ReadLayer::p_min) ;
+                    this->post_prob(i,n_class,n_shift,n_flip) = p ;
+                    colsums[n_class] += p ;
+                }
+            }
+        }
+    }
+    post_prob_colsum.set_value(colsums) ;
+}
+
+void EMRead::update_models()
+{   this->read_layer->update_model(this->post_prob,
+                                   this->post_prob_colsum,
+                                   this->threads) ;
+}
diff --git a/src/Clustering/EMRead.hpp b/src/Clustering/EMRead.hpp
new file mode 100644
index 0000000..da351b4
--- /dev/null
+++ b/src/Clustering/EMRead.hpp
@@ -0,0 +1,133 @@
+#ifndef EMREAD_HPP
+#define EMREAD_HPP
+
+#include <EMBase.hpp>
+
+#include <vector>
+#include <string>
+#include <future>       // std::promise
+
+#include <Matrix3D.hpp>
+#include <ReadLayer.hpp>
+
+
+typedef std::vector<double> vector_d ;
+
+
+class EMRead : public EMBase
+{   public:
+        /*!
+         * \brief Constructs an object to partition the
+         * region (rows) according to the shape of the signal
+         * with the given shifting and flipping freedom.
+         * \param read_matrix a matrix containing the read
+         * densitiy (ChIP-seq or related signal) for the
+         * regions of interest.
+         * \param n_class the number of region classes
+         * to search.
+         * \param n_iter the number of optimization iterations.
+         * \param n_shift the number of shift states allowed.
+         * \param flip whether flipping is allowed.
+         * \param seed a seed to initialise the random number
+         * generator.
+         * \param n_threads the number of parallel threads
+         * to run the computations. 0 means no parallel
+         * computing, everything is run on the main thread.
+         */
+        EMRead(const Matrix2D<int>& read_matrix,
+               size_t n_class,
+               size_t n_iter,
+               size_t n_shift,
+               bool flip,
+               const std::string& seed="",
+               size_t n_threads=0) ;
+
+        EMRead(const EMRead& other) = delete ;
+
+        /*!
+         * \brief Destructor.
+         */
+        virtual ~EMRead() override ;
+
+        /*!
+         * \brief Returns the class read signal model.
+         * \return the class read signal model.
+         */
+        Matrix3D<double> get_read_models() const ;
+
+        /*!
+         * \brief Runs the read signal model optimization and
+         * the data classification.
+         * \return a code indicating how the optimization
+         * ended.
+         */
+        virtual EMRead::exit_codes classify() override ;
+
+    private:
+
+        /*!
+         * \brief Computes the data log likelihood given the
+         * current models, for all layers and the joint
+         * likelihood for each state (the sum of the layer
+         * likelihoods for all layers, for a given state).
+         */
+        virtual void compute_loglikelihood() override ;
+
+        /*!
+         * \brief This is a routine of compute_loglikelihood().
+         * This method rescales the loglikelihood values by
+         * substacting to each value the maximum loglikelihood
+         * value found in the same data row.
+         * This method
+         * \param from the index of the first row
+         * in the data to consider.
+         * \param to the index of the past last row
+         * in the data to consider.
+         * \param done a promise to fill when the method
+         * is done.
+         */
+        void compute_loglikelihood_routine(size_t from,
+                                           size_t to,
+                                           std::promise<bool>& done) ;
+
+        /*!
+         * \brief Computes the data posterior probabilties.
+         */
+        virtual void compute_post_prob() override ;
+
+        /*!
+         * \brief The routine that effectively computes
+         * the posterior probabilties.
+         * \param from the index of the first row
+         * in the data to consider.
+         * \param to the index of the past last row
+         * in the data to consider.
+         * \param done the partial column (over the classes)
+         * sum of posterior probabilities. If several routines
+         * are running together, the colsums are retrieved by
+         * summing up the vectors together.
+         */
+        void compute_post_prob_routine(size_t from,
+                                       size_t to,
+                                       std::promise<vector_d>& post_prob_colsum) ;
+
+        /*!
+         * \brief Update the data models for all layers, given
+         * the current posterior and class probabilities.
+         */
+        virtual void update_models() override ;
+
+        /*!
+         * \brief the max loglikelihood value for
+         * each data row.
+         */
+        std::vector<double> loglikelihood_max ;
+        /*!
+         * \brief A pointer to the object managing
+         * the data and their model.
+         */
+        ReadLayer* read_layer ;
+
+} ;
+
+#endif // EMREAD_HPP
diff --git a/src/Clustering/EMSequence.cpp b/src/Clustering/EMSequence.cpp
new file mode 100644
index 0000000..0d75793
--- /dev/null
+++ b/src/Clustering/EMSequence.cpp
@@ -0,0 +1,310 @@
+#include <EMSequence.hpp>
+
+#include <string>
+#include <vector>
+#include <future>                    // std::promise, std::future
+#include <utility>                   // std::pair, std::move()
+#include <functional>                // std::bind(), std::ref()
+#include <cmath>                     // exp()
+
+#include <SequenceLayer.hpp>         // SequenceLayer
+#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
+#include <ConsoleProgressBar.hpp>    // ConsoleProgressBar
+#include <ThreadPool.hpp>            // ThreadPool
+#include <dna_utility.hpp>           // dna::base_composition()
+
+template<class T>
+std::ostream& operator << (std::ostream& stream, const std::vector<T>& v)
+{   for(const auto& x : v)
+    {   stream << x << " " ; }
+    return stream ;
+}
+
+
+EMSequence::EMSequence(const Matrix2D<int>& seq_matrix,
+                       size_t n_class,
+                       size_t n_iter,
+                       size_t n_shift,
+                       bool flip,
+                       bool bckg_class,
+                       const std::string& seed,
+                       size_t n_threads)
+    : EMBase(seq_matrix.get_nrow(),
+             seq_matrix.get_ncol(),
+             n_class,
+             n_iter,
+             n_shift,
+             flip,
+             n_threads),
+      loglikelihood_max(n_row, 0.),
+      seq_layer(nullptr)
+{   this->loglikelihood_max = vector_d(n_row, 0.) ;
+
+    // initialise post prob randomly
+    // getRandomGenerator(seed) ;
+    this->set_post_prob_random(seed) ;
+
+    // data and models
+    this->seq_layer = new SequenceLayer(seq_matrix,
+                                        this->n_class,
+                                        this->n_shift,
+                                        this->flip,
+                                        bckg_class) ;
+
+    // intialise the models with the post prob
+    this->seq_layer->update_model(this->post_prob,
+                                  this->threads) ;
+    // overwrite last class as background class
+    if(bckg_class)
+    {   // sequence composition
+        std::vector<double> base_comp =
+                          dna::base_composition(seq_matrix,
+                                                flip) ;
+        // create a motif
+        Matrix2D<double> bckg_motif(4,
+                                    seq_matrix.get_ncol()-this->n_shift+1) ;
+        for(size_t i=0; i<bckg_motif.get_nrow(); i++)
+        {   for(size_t j=0; j<bckg_motif.get_ncol(); j++)
+            {   bckg_motif(i,j) = base_comp[i] ; }
+        }
+        // overwrite
+        this->seq_layer->set_class(this->n_class-1,
+                                   bckg_motif) ;
+    }
+}
+
+EMSequence::EMSequence(const Matrix2D<int>& seq_matrix,
+                       const Matrix3D<double>& motifs,
+                       size_t n_iter,
+                       bool flip,
+                       bool bckg_class,
+                       size_t n_threads)
+    : EMBase(seq_matrix.get_nrow(),
+             seq_matrix.get_ncol(),
+             motifs.get_dim()[0],
+             n_iter,
+             seq_matrix.get_ncol() - motifs.get_dim()[1] + 1,
+             flip,
+             n_threads),
+      loglikelihood_max(n_row, 0.),
+      seq_layer(nullptr)
+{
+
+    this->loglikelihood_max = vector_d(n_row, 0.) ;
+
+    // initialise post prob randomly
+    // getRandomGenerator(seed) ;
+    // this->set_post_prob_random(seed) ;
+
+    // data and models
+    this->seq_layer = new SequenceLayer(seq_matrix,
+                                        motifs,
+                                        this->flip,
+                                        bckg_class) ;
+
+    // intialise the class prob uniformly
+    this->set_state_prob_uniform() ;
+}
+
+
+EMSequence::~EMSequence()
+{   if(this->seq_layer == nullptr)
+    {   delete this->seq_layer ;
+        this->seq_layer = nullptr ;
+    }
+}
+
+Matrix3D<double> EMSequence::get_sequence_models() const
+{   return seq_layer->get_model() ; }
+
+EMSequence::exit_codes EMSequence::classify()
+{
+    size_t bar_update_n = this->n_iter ;
+    ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;
+
+    // optimize the partition
+    for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
+    {   // E-step
+        this->compute_loglikelihood() ;
+        this->compute_post_prob() ;
+        // M-step
+        this->compute_class_prob() ;
+        this->update_models() ;
+        this->center_post_state_prob() ;
+        bar.update() ;
+    }
+    bar.update() ; std::cerr << std::endl ;
+    return EMSequence::exit_codes::ITER_MAX ;
+}
+
+void EMSequence::compute_loglikelihood()
+{   // compute the loglikelihood
+    this->seq_layer->compute_loglikelihoods(this->loglikelihood,
+                                            this->loglikelihood_max,
+                                            this->threads) ;
+    // rescale the values
+    // don't parallelize
+    if(this->threads == nullptr)
+    {   std::promise<bool> promise ;
+        std::future<bool> future = promise.get_future() ;
+        this->compute_loglikelihood_routine(0,
+                                            this->n_row,
+                                            promise) ;
+        future.get() ;
+    }
+    // parallelize
+    else
+    {    size_t n_threads = this->threads->getNThread() ;
+
+        // compute the slices on which each thread will work
+        std::vector<std::pair<size_t,size_t>> slices =
+                ThreadPool::split_range(0, this->n_row,n_threads) ;
+
+        // get promises and futures
+        std::vector<std::promise<bool>> promises(n_threads) ;
+        std::vector<std::future<bool>>  futures(n_threads) ;
+        for(size_t i=0; i<n_threads; i++)
+        {   futures[i] = promises[i].get_future() ; }
+
+        // distribute work to threads
+        // -------------------------- threads start --------------------------
+        for(size_t i=0; i<n_threads; i++)
+        {   auto slice = slices[i] ;
+            this->threads->addJob(std::move(
+                                      std::bind(&EMSequence::compute_loglikelihood_routine,
+                                                this,
+                                                slice.first,
+                                                slice.second,
+                                                std::ref(promises[i])))) ;
+        }
+        // wait until all threads are done working
+        for(auto& future : futures)
+        {   future.get() ; }
+        // -------------------------- threads stop ---------------------------
+    }
+}
+
+void EMSequence::compute_loglikelihood_routine(size_t from,
+                                               size_t to,
+                                               std::promise<bool>& done)
+{
+    // rescale the values
+    for(size_t i=from; i<to; i++)
+    {   for(size_t j=0; j<this->n_class; j++)
+        {   for(size_t k=0; k<this->n_shift; k++)
+            {   for(size_t l=0; l<this->n_flip; l++)
+                {   this->loglikelihood(i,j,k,l) =
+                            (this->loglikelihood(i,j,k,l) -
+                             this->loglikelihood_max[i]) ;
+                }
+            }
+        }
+    }
+    done.set_value(true) ;
+}
+
+void EMSequence::compute_post_prob()
+{   // don't parallelize
+    if(this->threads == nullptr)
+    {   std::promise<vector_d> promise ;
+        std::future<vector_d> future = promise.get_future() ;
+        this->compute_post_prob_routine(0, this->n_row, promise) ;
+        // compute the sum of post prob and the per class sum of post prob
+        // from the partial results computed on each slice
+        this->post_prob_tot = 0. ;
+        this->post_prob_colsum = future.get() ;
+        for(const auto& prob : this->post_prob_colsum)
+        {   this->post_prob_tot += prob ; }
+    }
+    // parallelize
+    else
+    {    size_t n_threads = this->threads->getNThread() ;
+
+        // compute the slices on which each thread will work
+        std::vector<std::pair<size_t,size_t>> slices =
+                ThreadPool::split_range(0, this->n_row,n_threads) ;
+
+        // get promises and futures
+        // the function run by the threads will compute
+        // the partial sum per class of post_prob for the given slice
+        // this should be used to compute the complete sum of post_prob
+        // and the complete sum per class of post_prob
+        std::vector<std::promise<vector_d>> promises(n_threads) ;
+        std::vector<std::future<vector_d>>  futures(n_threads) ;
+        for(size_t i=0; i<n_threads; i++)
+        {   futures[i] = promises[i].get_future() ; }
+
+        // distribute work to threads
+        // -------------------------- threads start --------------------------
+        for(size_t i=0; i<n_threads; i++)
+        {   auto slice = slices[i] ;
+            this->threads->addJob(std::move(
+                                      std::bind(&EMSequence::compute_post_prob_routine,
+                                                this,
+                                                slice.first,
+                                                slice.second,
+                                                std::ref(promises[i])))) ;
+        }
+        // wait until all threads are done working
+        // compute the sum of post prob and the per class sum of post prob
+        // from the partial results computed on each slice
+        this->post_prob_tot = 0. ;
+        this->post_prob_colsum = vector_d(this->n_class, 0.) ;
+        for(auto& future : futures)
+        {   auto probs = future.get() ;
+            for(size_t i=0; i<this->n_class; i++)
+            {   double prob = probs[i] ;
+                this->post_prob_colsum[i] += prob ;
+                this->post_prob_tot       += prob ;
+            }
+        }
+        // -------------------------- threads stop ---------------------------
+    }
+}
+
+
+void EMSequence::compute_post_prob_routine(size_t from,
+                                         size_t to,
+                                         std::promise<vector_d>& post_prob_colsum)
+{   vector_d colsums(this->n_class, 0.) ;
+
+    // reset grand total
+    // this->post_prob_tot = 0 ;
+    // this->post_prob_colsum = vector_d(n_class, 0) ;
+
+    // post prob
+    for(size_t i=from; i<to; i++)
+    {   // reset row sum to 0
+        this->post_prob_rowsum[i] = 0. ;
+        for(size_t n_class=0; n_class<this->n_class; n_class++)
+        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
+            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
+                {
+                    double p = exp(this->loglikelihood(i,n_class,n_shift,n_flip)) *
+                                   this->post_state_prob(n_class,n_shift,n_flip) ;
+                    this->post_prob(i,n_class,n_shift,n_flip) = p ;
+                    this->post_prob_rowsum[i] += p ;
+                }
+            }
+        }
+        // normalize
+        for(size_t n_class=0; n_class<this->n_class; n_class++)
+        {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
+            {   for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
+                {
+                    double p = std::max(this->post_prob(i,n_class,n_shift,n_flip) /
+                                        this->post_prob_rowsum[i],
+                                        SequenceLayer::p_min) ;
+                    this->post_prob(i,n_class,n_shift,n_flip) = p ;
+                    colsums[n_class] += p ;
+                }
+            }
+        }
+    }
+    post_prob_colsum.set_value(colsums) ;
+}
+
+void EMSequence::update_models()
+{   this->seq_layer->update_model(this->post_prob,
+                                  this->threads) ;
+}
diff --git a/src/Clustering/EMSequence.hpp b/src/Clustering/EMSequence.hpp
new file mode 100644
index 0000000..fdeef47
--- /dev/null
+++ b/src/Clustering/EMSequence.hpp
@@ -0,0 +1,173 @@
+#ifndef EMSEQUENCE_HPP
+#define EMSEQUENCE_HPP
+
+#include <EMBase.hpp>
+
+#include <vector>
+#include <string>
+#include <future>       // std::promise
+
+#include <Matrix3D.hpp>
+#include <SequenceLayer.hpp>
+
+
+typedef std::vector<double> vector_d ;
+
+
+class EMSequence : public EMBase
+{   public:
+        /*!
+         * \brief Constructs an object to partition the
+         * given sequences (rows) according to their motif
+         * content.
+         * The sequences models are initialised randomly.
+         * \param sequence_matrix a matrix containing the sequences
+         *  of interest.
+         * \param n_class the number of region classes
+         * to search.
+         * \param n_iter the number of optimization iterations.
+         * \param n_shift the number of shift states allowed.
+         * \param flip whether flipping is allowed.
+         * \param bckg_class the last class is used to model the background
+         * by setting all its parameters, at all positions, to the
+         * background base probabilties. Since the background is constant,
+         * this class will never be updated.
+         * \param seed a seed to initialise the random number
+         * generator.
+         * \param n_threads the number of parallel threads
+         * to run the computations. 0 means no parallel
+         * computing, everything is run on the main thread.
+         */
+        EMSequence(const Matrix2D<int>& sequence_matrix,
+                   size_t n_class,
+                   size_t n_iter,
+                   size_t n_shift,
+                   bool flip,
+                   bool bckg_class,
+                   const std::string& seed="",
+                   size_t n_threads=0) ;
+
+        /*!
+         * \brief Constructs an object to partition the
+         * given sequences (rows) according to their motif
+         * content.
+         * The sequences class models are initialised using
+         * the given motifs. The class probabilities are
+         * initialised uniformlly.
+         * The shifting freedom is set to (data number
+         * of columns) - (the model 2nd dimension)
+         * + 1.
+         * \param sequence_matrix a matrix containing the sequences
+         *  of interest.
+         * \param motifs a matrix containing the different initial
+         * class models with the following dimensions :
+         * dim1 the number of classes
+         * dim2 the model length
+         * dim3 4 for A,C,G,T
+         * \param n_class the number of region classes
+         * to search.
+         * \param n_iter the number of optimization iterations.
+         * \param flip whether flipping is allowed.
+         * \param bckg_class indicates that the last class in the
+         * given motifs is used to model the background and it
+         * should never be updated.
+         * \param n_threads the number of parallel threads
+         * to run the computations. 0 means no parallel
+         * computing, everything is run on the main thread.
+         */
+        EMSequence(const Matrix2D<int>& sequence_matrix,
+                   const Matrix3D<double>& motifs,
+                   size_t n_iter,
+                   bool flip,
+                   bool bckg_class,
+                   size_t n_threads=0) ;
+
+        EMSequence(const EMSequence& other) = delete ;
+
+        /*!
+         * \brief Destructor.
+         */
+        virtual ~EMSequence() override ;
+
+        /*!
+         * \brief Returns the class sequence model.
+         * \return the class sequence model.
+         */
+        Matrix3D<double> get_sequence_models() const ;
+
+        /*!
+         * \brief Runs the sequence model optimization and
+         * the data classification.
+         * \return a code indicating how the optimization
+         * ended.
+         */
+        virtual EMSequence::exit_codes classify() override ;
+
+    private:
+
+        /*!
+         * \brief Computes the data log likelihood given the
+         * current models, for all layers and the joint
+         * likelihood for each state (the sum of the layer
+         * likelihoods for all layers, for a given state).
+         */
+        virtual void compute_loglikelihood() override ;
+
+        /*!
+         * \brief This is a routine of compute_loglikelihood().
+         * This method rescales the loglikelihood values by
+         * substacting to each value the maximum loglikelihood
+         * value found in the same data row.
+         * This method
+         * \param from the index of the first row
+         * in the data to consider.
+         * \param to the index of the past last row
+         * in the data to consider.
+         * \param done a promise to fill when the method
+         * is done.
+         */
+        void compute_loglikelihood_routine(size_t from,
+                                           size_t to,
+                                           std::promise<bool>& done) ;
+
+        /*!
+         * \brief Computes the data posterior probabilties.
+         */
+        virtual void compute_post_prob() override ;
+
+        /*!
+         * \brief The routine that effectively computes
+         * the posterior probabilties.
+         * \param from the index of the first row
+         * in the data to consider.
+         * \param to the index of the past last row
+         * in the data to consider.
+         * \param done the partial column (over the classes)
+         * sum of posterior probabilities. If several routines
+         * are running together, the colsums are retrieved by
+         * summing up the vectors together.
+         */
+        void compute_post_prob_routine(size_t from,
+                                       size_t to,
+                                       std::promise<vector_d>& post_prob_colsum) ;
+
+        /*!
+         * \brief Update the data models for all layers, given
+         * the current posterior and class probabilities.
+         */
+        virtual void update_models() override ;
+
+        /*!
+         * \brief the max loglikelihood value for
+         * each data row.
+         */
+        std::vector<double> loglikelihood_max ;
+        /*!
+         * \brief A pointer to the object managing
+         * the data and their model.
+         */
+        SequenceLayer* seq_layer ;
+
+} ;
+
+#endif // EMSEQUENCE_HPP
diff --git a/src/Clustering/ModelComputer.cpp b/src/Clustering/ModelComputer.cpp
index 20e6cd5..e9b5acd 100644
--- a/src/Clustering/ModelComputer.cpp
+++ b/src/Clustering/ModelComputer.cpp
@@ -1,33 +1,34 @@
 #include <ModelComputer.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
 
 ModelComputer::ModelComputer()
     : data_layer(nullptr)
 {}
 
 ModelComputer::~ModelComputer()
 {   if(this->data_layer != nullptr)
     {   delete this->data_layer ;
         this->data_layer = nullptr ;
     }
 }
 
-matrix2d_d ModelComputer::get_model() const
+Matrix2D<double> ModelComputer::get_model() const
 {   // the model
-    matrix3d_d model = this->data_layer->get_model() ;
-    size_t n_class = model.size() ;
-    size_t l_model = model[0].size() ;
-    size_t n_categ = model[0][0].size() ;
+    Matrix3D<double> model = this->data_layer->get_model() ;
+    size_t n_class = model.get_dim()[0] ;
+    size_t l_model = model.get_dim()[1] ;
+    size_t n_categ = model.get_dim()[2] ;
     // a nice representation of the model
-    matrix2d_d model_nice(n_class*n_categ,
-                          vector_d(l_model)) ;
+    Matrix2D<double> model_nice(n_class*n_categ,
+                                l_model) ;
     for(size_t i=0; i<n_class; i++)
     {   for(size_t j=0; j<n_categ; j++)
         {   size_t row = (i*n_categ) + j ;
             for(size_t k=0; k<l_model; k++)
-            {   model_nice[row][k] = model[i][k][j] ; }
+            {   model_nice(row,k) = model(i,k,j) ; }
         }
     }
 
     return model_nice ;
 }
diff --git a/src/Clustering/ModelComputer.hpp b/src/Clustering/ModelComputer.hpp
index ab87f5a..9359f9e 100644
--- a/src/Clustering/ModelComputer.hpp
+++ b/src/Clustering/ModelComputer.hpp
@@ -1,54 +1,54 @@
 #ifndef MODELCOMPUTER_HPP
 #define MODELCOMPUTER_HPP
 
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
 #include <DataLayer.hpp>
 
 class ModelComputer
 {
     public:
         /*!
          * \brief Constructs an empty object.
          */
         ModelComputer() ;
 
         ModelComputer(const ModelComputer& other) = delete ;
 
         /*!
          * \brief Destructor.
          */
         virtual ~ModelComputer() ;
 
         /*!
          * \brief Returns the data model in a nice
          * format.
          * 1st dim: the different classes and
          * the model categories. For instance,
          * a read model with 2 classes will have
          * class 1 and class 2 over the rows.
          * A sequence model with 2 classes will
          * have class 1 A, class 1 C, class 1 G,
          * class 1 T, class 2 A, class 2 C,
          * class 2 G and class 2 T.
          * 2nd dim: the model length
          *    ___________
          *    |  class1  | /|\
          * ___|__________|_\|/ 1 (reads) or 4 (sequences)
          *    |  class2  | /|\
          *    |__________| \|/ 1 (reads) or 4 (sequences)
          *
          *    <---------->
          *     model length
          * \return the data model.
          */
-        virtual matrix2d_d get_model() const ;
+        virtual Matrix2D<double> get_model() const ;
 
     protected:
         /*!
          * \brief The data layer containing the
          * data and their models.
          */
         DataLayer* data_layer ;
 } ;
 
 #endif // MODELCOMPUTER_HPP
diff --git a/src/Clustering/ReadLayer.cpp b/src/Clustering/ReadLayer.cpp
index 511b081..a974e91 100644
--- a/src/Clustering/ReadLayer.cpp
+++ b/src/Clustering/ReadLayer.cpp
@@ -1,569 +1,478 @@
 #include <ReadLayer.hpp>
 #include <stdexcept>         // std::invalid_argument
 #include <limits>            // numeric_limits
 #include <cmath>             // log(), exp(), pow()
 #include <vector>
 #include <future>            // std::promise, std::future
 #include <utility>           // std::pair, std::move()
 #include <functional>        // std::bind(), std::ref()
+
 #include <Statistics.hpp>    // beta_pmf(), poisson_pmf()
 #include <Random.hpp>        // rand_real_uniform(), rand_int_uniform()
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
 #include <ThreadPool.hpp>
 
+#include <iostream>
+
+typedef std::vector<double> vector_d ;
 
-ReadLayer::ReadLayer(const matrix2d_i& data,
+
+ReadLayer::ReadLayer(const Matrix2D<int>& data,
                      size_t n_class,
                      size_t n_shift,
                      bool flip,
                      ThreadPool* threads)
     : DataLayer(data, n_class, n_shift, flip),
-      window_means(n_row,
-                   vector_d(n_shift, 0.))
+      window_means(n_row, n_shift, 0.)
 {   this->n_category = 1 ;
     // initialise the empty model
-    this->model = matrix3d_d(this->n_class,
-                             matrix2d_d(this->l_model,
-                                        vector_d(this->n_category, 0))) ;
+    this->model = Matrix3D<double>(this->n_class,
+                                   this->l_model,
+                                   this->n_category,
+                                   0) ;
     // compute window means
     this->compute_window_means(threads) ;
 }
 
-ReadLayer::ReadLayer(const matrix2d_i& data,
-                     const matrix3d_d& model,
+ReadLayer::ReadLayer(const Matrix2D<int>& data,
+                     const Matrix3D<double>& model,
                      bool flip,
                      ThreadPool* threads)
     : DataLayer(data, model, flip),
-      window_means(n_row,
-                   vector_d(n_shift, 0.))
+      window_means(n_row, n_shift, 0.)
 {   // check that the model only has one category
     if(this->n_category > 1)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! model is expected to have length 1 on "
                 "3rd dimension, not %zu",
                 this->n_category) ;
         throw std::invalid_argument(msg) ;
     }
     // compute window means
     this->compute_window_means(threads) ;
 }
 
 
 ReadLayer::~ReadLayer()
 {}
 
-void ReadLayer::seed_model_randomly()
-{
-    // get random values from a beta distribution cannot be done using boost so
-    // i) generate random number [0,1] x
-    // ii) compute f(x) where f is beta distribution
-
-    matrix2d_d prob(this->n_row, vector_d(this->n_class, 0.)) ;
-    double tot_sum = 0. ;
-
-    // sample the prob
-    // beta distribution parameters
-    double alpha = pow(this->n_row, -0.5) ;
-    double beta  = 1. ;
-    for(size_t i=0; i<this->n_row; i++)
-    {   double row_sum = 0. ;
-        for(size_t j=0; j<this->n_class; j++)
-        {   double x = rand_real_uniform(0., 1.0) ;
-            // double p = std::max(ReadLayer::p_min, beta_pmf(x, alpha, beta)) ;
-            double p       = beta_pmf(x, alpha, beta) ;
-            prob[i][j]     = p ;
-            tot_sum       += p ;
-            row_sum       += p ;
-        }
-        // normalize
-        for(size_t j=0; j<this->n_class; j++)
-        {   prob[i][j] /= row_sum ; }
-    }
-
-    // compute the refererences
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_model; j_ref++, j_dat++)
-            {   this->model[j][j_ref][0] += (this->data[i][j_dat] * prob[i][j]) ; }
-        }
-    }
-    // avoid 0's in the model to ensure that pmf_poisson() never
-    // return 0
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->l_model; j++)
-        {   for(size_t k=0; k<this->n_category; k++)
-            {   this->model[i][j][k] =
-                    std::max(this->model[i][j][k], ReadLayer::p_min) ;
-            }
-        }
-    }
-}
-
-void ReadLayer::seed_model_sampling()
-{   std::vector<bool> choosen(this->n_row, false) ;
-
-    for(size_t i=0; i<this->n_class; )
-    {   size_t index = rand_int_uniform(size_t(0), size_t(this->n_row-1)) ;
-        // already choose
-        if(choosen[index])
-        {   ; }
-        // not yet choosen as reference
-        else
-        {   for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_model; j_ref++, j_dat++)
-            {   this->model[i][j_ref][0] = this->data[index][j_dat] ; }
-            choosen[index] = true ;
-            i++ ;
-        }
-    }
-    // avoid 0's in the model to ensure that pmf_poisson() never
-    // return 0
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->l_model; j++)
-        {   for(size_t k=0; k<this->n_category; k++)
-            {   this->model[i][j][k] =
-                    std::max(this->model[i][j][k], ReadLayer::p_min) ;
-            }
-        }
-    }
-}
-
-void ReadLayer::seed_model_toy()
-{   // sample data to initialise the references
-    std::vector<bool> choosen(this->n_row, false) ;
-
-    for(size_t i=0; i<this->n_class; )
-    {   size_t index = i ;
-        // already choose
-        if(choosen[index])
-        {   ; }
-        // not yet choosen as reference
-        else
-        {   for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_model; j_ref++, j_dat++)
-            {   this->model[i][j_ref][0] = this->data[index][j_dat] ; }
-            choosen[index] = true ;
-            i++ ;
-        }
-    }
-    // avoid 0's in the model to ensure that pmf_poisson() never
-    // return 0
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->l_model; j++)
-        {   for(size_t k=0; k<this->n_category; k++)
-            {   this->model[i][j][k] =
-                    std::max(this->model[i][j][k], ReadLayer::p_min) ;
-            }
-        }
-    }
-}
-
-
-void ReadLayer::compute_loglikelihoods(matrix4d_d& loglikelihood,
+void ReadLayer::compute_loglikelihoods(Matrix4D<double>& loglikelihood,
                                        vector_d& loglikelihood_max,
                                        ThreadPool* threads) const
 {   // dimension checks
     this->check_loglikelihood_dim(loglikelihood) ;
     this->check_loglikelihood_max_dim(loglikelihood_max) ;
 
     // don't parallelize
     if(threads == nullptr)
     {   std::promise<bool> promise ;
         std::future<bool> future = promise.get_future() ;
-        this->compute_loglikelihoods_routine(0, this->n_row,
+        this->compute_loglikelihoods_routine(0,
+                                             this->n_row,
                                              std::ref(loglikelihood),
                                              std::ref(loglikelihood_max),
                                              promise) ;
         future.get() ;
     }
     // parallelize
     else
     {   size_t n_threads = threads->getNThread() ;
         // compute the slices on which each thread will work
         std::vector<std::pair<size_t,size_t>> slices =
                 ThreadPool::split_range(0, this->n_row, n_threads) ;
 
         // get promises and futures
         // the function run by the threads will simply fill the promise with
         // "true" to indicate that they are done
         std::vector<std::promise<bool>> promises(n_threads) ;
         std::vector<std::future<bool>>  futures(n_threads) ;
         for(size_t i=0; i<n_threads; i++)
         {   futures[i] = promises[i].get_future() ; }
         // distribute work to threads
         // -------------------------- threads start --------------------------
         for(size_t i=0; i<n_threads; i++)
         {   auto slice = slices[i] ;
             threads->addJob(std::move(
                                 std::bind(&ReadLayer::compute_loglikelihoods_routine,
                                           this,
                                           slice.first,
                                           slice.second,
                                           std::ref(loglikelihood),
                                           std::ref(loglikelihood_max),
                                           std::ref(promises[i])))) ;
         }
         // wait until all threads are done working
         for(auto& future : futures)
         {   future.get() ; }
         // -------------------------- threads stop ---------------------------
     }
 }
 
 
 void ReadLayer::compute_loglikelihoods_routine(size_t from,
                                                size_t to,
-                                               matrix4d_d& loglikelihood,
+                                               Matrix4D<double>& loglikelihood,
                                                vector_d& loglikelihood_max,
                                                std::promise<bool>& done) const
 {
     // normalize the models
-    matrix3d_d model_norm = this->model ;
+    Matrix3D<double> model_norm = this->model ;
     for(size_t i=0; i<this->n_class; i++)
     {   double mean = 0. ;
         for(size_t j=0; j<this->l_model; j++)
-        {   mean += model_norm[i][j][0] ; }
+        {   mean += model_norm(i,j,0) ; }
         mean /= this->l_model ;
         for(size_t j=0; j<this->l_model; j++)
-        {   model_norm[i][j][0] /= mean ; }
+        {   model_norm(i,j,0) /= mean ; }
     }
 
+
     // compute log likelihood
     for(size_t i=from; i<to; i++)
     {
         // set max to min possible value
         loglikelihood_max[i] = std::numeric_limits<double>::lowest() ;
 
         for(size_t j=0; j<this->n_class; j++)
         {   for(size_t s_fw=0, s_rev=this->n_shift-1;
                 s_fw<this->n_shift; s_fw++, s_rev--)
             {   // slice is [from_fw,to)
                 //    from_dat_fw             to_dat_fw    [from_dat_fw, to_dat_fw]
                 // fw  |---------->>>----------|
                 //     ----------------------------------> data
                 // rev           |----------<<<----------| [from_dat_rev, to_dat_rev]
                 //                                         to_dat_rev can be -1 -> int
                 //            to_dat_rev             from_dat_rev
 
                 // log likelihood
                 double ll_fw  = 0. ;
                 double ll_rev = 0. ;
                 // --------------- forward ---------------
                 size_t from_dat_fw   = s_fw ;
                 size_t to_dat_fw     = from_dat_fw + this->l_model - 1 ;
                 // --------------- reverse ---------------
                 size_t from_dat_rev = this->n_col - 1 - s_fw ;
                 // size_t to_dat_rev   = from_dat_rev - (this->l_model - 1) ;
 
                 for(size_t j_dat_fw=from_dat_fw,j_ref_fw=0, j_dat_rev=from_dat_rev;
                     j_dat_fw<to_dat_fw;
                     j_dat_fw++, j_ref_fw++, j_dat_rev--)
                 {
                     double ll ;
                     // --------------- forward ---------------
-                    ll = log(poisson_pmf(this->data[i][j_dat_fw],
-                                         model_norm[j][j_ref_fw][0]*
-                                         this->window_means[i][s_fw])) ;
+                    ll = log(poisson_pmf(this->data(i,j_dat_fw),
+                                         model_norm(j,j_ref_fw,0)*
+                                         this->window_means(i,s_fw))) ;
                     ll_fw += ll ;
                     // ll_fw += std::max(ll, ReadLayer::p_min_log) ;
                     // --------------- reverse ---------------
                     if(this->flip)
-                    {   ll = log(poisson_pmf(this->data[i][j_dat_rev],
-                                             model_norm[j][j_ref_fw][0]*
-                                             this->window_means[i][s_rev])) ;
+                    {   ll = log(poisson_pmf(this->data(i,j_dat_rev),
+                                             model_norm(j,j_ref_fw,0)*
+                                             this->window_means(i,s_rev))) ;
                         ll_rev += ll ;
                         // ll_rev += std::max(ll, ReadLayer::p_min_log) ;
                     }
                 }
-                loglikelihood[i][j][from_dat_fw][flip_states::FORWARD] = ll_fw  ;
+                loglikelihood(i,j,from_dat_fw,flip_states::FORWARD) = ll_fw  ;
                 // keep track of the max per row
                 if(ll_fw > loglikelihood_max[i])
                 {   loglikelihood_max[i] = ll_fw ; }
 
                 if(this->flip)
-                {   loglikelihood[i][j][from_dat_fw][flip_states::REVERSE] = ll_rev ;
+                {   loglikelihood(i,j,from_dat_fw,flip_states::REVERSE) = ll_rev ;
                     // keep track of the max per row
                     if(ll_rev > loglikelihood_max[i])
                     {   loglikelihood_max[i] = ll_rev ; }
                 }
             }
         }
     }
     done.set_value(true) ;
 }
 
 
-
-
-void ReadLayer::update_model(const matrix4d_d& posterior_prob,
+void ReadLayer::update_model(const Matrix4D<double>& posterior_prob,
                              ThreadPool* threads)
-{   // computing sum over the columns (classes)
-    size_t n_row   = posterior_prob.size() ;
-    size_t n_class = posterior_prob[0].size() ;
-    size_t n_shift = posterior_prob[0][0].size() ;
-    size_t n_flip  = posterior_prob[0][0][0].size() ;
+{
+    // computing sum over the columns (classes)
+    size_t n_row   = posterior_prob.get_dim()[0] ;
+    size_t n_class = posterior_prob.get_dim()[1] ;
+    size_t n_shift = posterior_prob.get_dim()[2] ;
+    size_t n_flip  = posterior_prob.get_dim()[3] ;
     vector_d colsum(n_class, 0.) ;
     for(size_t i=0; i<n_row; i++)
     {   for(size_t j=0; j<n_class; j++)
         {   for(size_t k=0; k<n_shift; k++)
             {   for(size_t l=0; l<n_flip; l++)
-                {   colsum[j] += posterior_prob[i][j][k][l] ; }
+                {   colsum[j] += posterior_prob(i,j,k,l) ; }
             }
         }
     }
-
+    this->update_model(posterior_prob,
+                       colsum,
+                       threads) ;
+    /*
     // don't parallelize
     if(threads == nullptr)
-    {   std::promise<matrix3d_d> promise ;
-        std::future<matrix3d_d>  future = promise.get_future() ;
+    {   std::promise<Matrix3D<double>> promise ;
+        std::future<Matrix3D<double>>  future = promise.get_future() ;
         this->update_model_routine(0,
                                    this->n_row,
                                    posterior_prob,
                                    colsum,
                                    promise) ;
         this->model = future.get() ;
     }
     // parallelize
     else
     {   size_t n_threads = threads->getNThread() ;
         // compute the slices on which each thread will work
         std::vector<std::pair<size_t,size_t>> slices =
                 ThreadPool::split_range(0, this->n_row, n_threads) ;
 
         // get promises and futures
         // the function run by the threads will simply fill the promise with
         // "true" to indicate that they are done
-        std::vector<std::promise<matrix3d_d>> promises(n_threads) ;
-        std::vector<std::future<matrix3d_d>>  futures(n_threads) ;
+        std::vector<std::promise<Matrix3D<double>>> promises(n_threads) ;
+        std::vector<std::future<Matrix3D<double>>>  futures(n_threads) ;
         for(size_t i=0; i<n_threads; i++)
         {   futures[i] = promises[i].get_future() ; }
         // distribute work to threads
         // -------------------------- threads start --------------------------
         for(size_t i=0; i<n_threads; i++)
         {   auto slice = slices[i] ;
             threads->addJob(std::move(
                                 std::bind(&ReadLayer::update_model_routine,
                                           this,
                                           slice.first,
                                           slice.second,
                                           posterior_prob,
                                           colsum,
                                           std::ref(promises[i])))) ;
         }
         // reinitialise the model
-        this->model = matrix3d_d(this->n_class,
-                                 matrix2d_d(this->l_model,
-                                            vector_d(this->n_category, 0))) ;
+        this->model = Matrix3D<double>(this->n_class,
+                                       this->l_model,
+                                       this->n_category,
+                                       0.) ;
         // wait until all threads are done working
         // and update the model
         for(auto& future : futures)
-        {   matrix3d_d model_part = future.get() ;
+        {   Matrix3D<double> model_part = future.get() ;
             for(size_t i=0; i<this->n_class; i++)
             {   for(size_t j=0; j<this->l_model; j++)
                 {   for(size_t k=0; k<this->n_category; k++)
-                    {   this->model[i][j][k] +=
-                            model_part[i][j][k] ;
+                    {   this->model(i,j,k) +=
+                            model_part(i,j,k) ;
                     }
                 }
             }
         }
         // -------------------------- threads stop ---------------------------
     }
     // avoid 0's in the model to ensure that pmf_poisson() never
     // return 0
     for(size_t i=0; i<this->n_class; i++)
     {   for(size_t j=0; j<this->l_model; j++)
         {   for(size_t k=0; k<this->n_category; k++)
-            {   this->model[i][j][k] =
-                    std::max(this->model[i][j][k], ReadLayer::p_min) ;
+            {   this->model(i,j,k) =
+                    std::max(this->model(i,j,k), ReadLayer::p_min) ;
             }
         }
     }
+    */
 }
 
-void ReadLayer::update_model(const matrix4d_d& posterior_prob,
+void ReadLayer::update_model(const Matrix4D<double>& posterior_prob,
                              const vector_d& posterior_prob_colsum,
                              ThreadPool* threads)
 {
     // don't parallelize
     if(threads == nullptr)
-    {   std::promise<matrix3d_d> promise ;
-        std::future<matrix3d_d>  future = promise.get_future() ;
+    {   std::promise<Matrix3D<double>> promise ;
+        std::future<Matrix3D<double>>  future = promise.get_future() ;
         this->update_model_routine(0,
                                    this->n_row,
                                    posterior_prob,
                                    posterior_prob_colsum,
                                    promise) ;
         this->model = future.get() ;
     }
     // parallelize
     else
     {   size_t n_threads = threads->getNThread() ;
         // compute the slices on which each thread will work
         std::vector<std::pair<size_t,size_t>> slices =
                 ThreadPool::split_range(0, this->n_row, n_threads) ;
 
         // get promises and futures
         // the function run by the threads will simply fill the promise with
         // "true" to indicate that they are done
-        std::vector<std::promise<matrix3d_d>> promises(n_threads) ;
-        std::vector<std::future<matrix3d_d>>  futures(n_threads) ;
+        std::vector<std::promise<Matrix3D<double>>> promises(n_threads) ;
+        std::vector<std::future<Matrix3D<double>>>  futures(n_threads) ;
         for(size_t i=0; i<n_threads; i++)
         {   futures[i] = promises[i].get_future() ; }
         // distribute work to threads
         // -------------------------- threads start --------------------------
         for(size_t i=0; i<n_threads; i++)
         {   auto slice = slices[i] ;
             threads->addJob(std::move(
                                 std::bind(&ReadLayer::update_model_routine,
                                           this,
                                           slice.first,
                                           slice.second,
-                                          posterior_prob,
-                                          posterior_prob_colsum,
+                                          std::ref(posterior_prob),
+                                          std::ref(posterior_prob_colsum),
                                           std::ref(promises[i])))) ;
         }
         // reinitialise the model
-        this->model = matrix3d_d(this->n_class,
-                                 matrix2d_d(this->l_model,
-                                            vector_d(this->n_category, 0))) ;
+        this->model = Matrix3D<double>(this->n_class,
+                                       this->l_model,
+                                       this->n_category,
+                                       0.) ;
         // wait until all threads are done working
         // and update the mode
         for(auto& future : futures)
-        {   matrix3d_d model_part = future.get() ;
+        {   Matrix3D<double> model_part = future.get() ;
             for(size_t i=0; i<this->n_class; i++)
             {   for(size_t j=0; j<this->l_model; j++)
                 {   for(size_t k=0; k<this->n_category; k++)
-                    {   this->model[i][j][k] +=
-                            model_part[i][j][k] ;
+                    {   this->model(i,j,k) +=
+                            model_part(i,j,k) ;
                     }
                 }
             }
         }
         // -------------------------- threads stop ---------------------------
     }
     // avoid 0's in the model to ensure that pmf_poisson() never
     // return 0
     for(size_t i=0; i<this->n_class; i++)
     {   for(size_t j=0; j<this->l_model; j++)
         {   for(size_t k=0; k<this->n_category; k++)
-            {   this->model[i][j][k] =
-                    std::max(this->model[i][j][k], ReadLayer::p_min) ;
+            {   this->model(i,j,k) =
+                    std::max(this->model(i,j,k), ReadLayer::p_min) ;
             }
         }
     }
 }
 
 void ReadLayer::update_model_routine(size_t from,
                                      size_t to,
-                                     const matrix4d_d& posterior_prob,
+                                     const Matrix4D<double>& posterior_prob,
                                      const vector_d& posterior_prob_colsum,
-                                     std::promise<matrix3d_d>& promise) const
+                                     std::promise<Matrix3D<double>>& promise) const
 {
     // dimension checks
     this->check_posterior_prob_dim(posterior_prob) ;
     this->check_posterior_prob_colsum_dim(posterior_prob_colsum) ;
 
     // partial model
-    matrix3d_d model = matrix3d_d(this->n_class,
-                                  matrix2d_d(this->l_model,
-                                             vector_d(this->n_category, 0.))) ;
+    Matrix3D<double> model(this->n_class,
+                           this->l_model,
+                           this->n_category,
+                           0.) ;
 
     for(size_t n_class=0; n_class < this->n_class; n_class++)
     {   for(size_t i=from; i<to; i++)
         {   for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
             {   // --------------- forward ---------------
                 int from_dat_fw = n_shift ;
                 int to_dat_fw   = from_dat_fw + this->l_model - 1 ;
                 for(int j_dat_fw=from_dat_fw, j_ref_fw=0;
                     j_dat_fw<=to_dat_fw; j_dat_fw++, j_ref_fw++)
-                {   model[n_class][j_ref_fw][0] +=
-                            (posterior_prob[i][n_class][n_shift][flip_states::FORWARD] *
-                            this->data[i][j_dat_fw]) /
+                {   model(n_class,j_ref_fw,0) +=
+                            (posterior_prob(i,n_class,n_shift,flip_states::FORWARD) *
+                            this->data(i,j_dat_fw)) /
                             posterior_prob_colsum[n_class] ;
                 }
                 // --------------- reverse ---------------
                 if(this->flip)
                 {   int from_dat_rev = this->n_col - 1 - n_shift ;
                     int to_dat_rev   = from_dat_rev - (this->l_model - 1) ;
                     for(int j_dat_rev=from_dat_rev, j_ref_fw=0;
                         j_dat_rev >= to_dat_rev; j_dat_rev--, j_ref_fw++)
-                    {   model[n_class][j_ref_fw][0] +=
-                                (posterior_prob[i][n_class][n_shift][flip_states::REVERSE] *
-                                this->data[i][j_dat_rev]) /
+                    {   model(n_class,j_ref_fw,0) +=
+                                (posterior_prob(i,n_class,n_shift,flip_states::REVERSE) *
+                                this->data(i,j_dat_rev)) /
                                 posterior_prob_colsum[n_class] ;
                     }
                 }
             }
         }
     }
     promise.set_value(model) ;
 }
 
 void ReadLayer::compute_window_means(ThreadPool* threads)
 {   // don't parallelize
     if(threads == nullptr)
     {   std::promise<bool> promise ;
         std::future<bool> future = promise.get_future() ;
         this->compute_window_means_routine(0, this->n_row, promise) ;
         future.get() ;
     }
     // parallelize
     else
     {   size_t n_threads = threads->getNThread() ;
         // compute the slices on which each thread will work
         std::vector<std::pair<size_t,size_t>> slices =
                 ThreadPool::split_range(0, this->n_row, n_threads) ;
 
         // get promises and futures
         // the function run by the threads will simply fill the promise with
         // "true" to indicate that they are done
         std::vector<std::promise<bool>> promises(n_threads) ;
         std::vector<std::future<bool>>  futures(n_threads) ;
         for(size_t i=0; i<n_threads; i++)
         {   futures[i] = promises[i].get_future() ; }
         // distribute work to threads
         // -------------------------- threads start --------------------------
         for(size_t i=0; i<n_threads; i++)
         {   auto slice = slices[i] ;
             threads->addJob(std::move(
                                 std::bind(&ReadLayer::compute_window_means_routine,
                                           this,
                                           slice.first,
                                           slice.second,
                                           std::ref(promises[i])))) ;
         }
         // wait until all threads are done working
         for(auto& future : futures)
         {   future.get() ; }
         // -------------------------- threads stop ---------------------------
     }
 }
 
 void ReadLayer::compute_window_means_routine(size_t from,
                                              size_t to,
                                              std::promise<bool>& done)
 {   double l_window = double(this->l_model) ;
     for(size_t i=from; i<to; i++)
     {   for(size_t from=0; from<this->n_shift; from++)
         {   double sum = 0. ;
             // slice is [from,to)
             size_t to = from + this->l_model ;
             for(size_t j=from; j<to; j++)
-            {   sum += this->data[i][j] ;}
-            this->window_means[i][from] = sum / l_window ;
+            {   sum += this->data(i,j) ;}
+            this->window_means(i,from) = sum / l_window ;
         }
     }
     done.set_value(true) ;
 }
 
 void ReadLayer::check_posterior_prob_colsum_dim(const vector_d& posterior_prob_colsum) const
 {   if(posterior_prob_colsum.size() != this->n_class)
     {   char msg[4096] ;
         sprintf(msg,
                 "Error! posterior_class_prob matrix size is not "
                 "equal to model class number : %zu / %zu",
                 posterior_prob_colsum.size(), this->n_class) ;
         throw std::invalid_argument(msg) ;
     }
 }
 
diff --git a/src/Clustering/ReadLayer.hpp b/src/Clustering/ReadLayer.hpp
index b0d7636..0be7a3c 100644
--- a/src/Clustering/ReadLayer.hpp
+++ b/src/Clustering/ReadLayer.hpp
@@ -1,227 +1,209 @@
 #ifndef READLAYER_HPP
 #define READLAYER_HPP
 
 #include <DataLayer.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
 #include <ThreadPool.hpp>
 
+typedef std::vector<double> vector_d ;
+
 class ReadLayer : public DataLayer
 {
     public:
         /*!
          * \brief Constructs an object with the
          * given data and an empty (0 values)
          * model.
          * \param data the data.
          * \param n_class the number of classes
          * of the model.
          * \param n_shift the number of shift
          * states of the model.
          * \param flip whether flipping is allowed.
          * \param threads a pointer to a thread pool to
          * parallelize the computations. If nullptr is given,
          * the computations are performed by the main thread.
          */
-        ReadLayer(const matrix2d_i& data,
+        ReadLayer(const Matrix2D<int>& data,
                   size_t n_class,
                   size_t n_shift,
                   bool flip,
                   ThreadPool* threads = nullptr) ;
 
         /*!
          * \brief Construct an object with the
          * given data and model.
          * \param data the data.
          * \param the model.
          * \param flip whether flipping is allowed.
          * \param threads a pointer to a thread pool to
          * parallelize the computations. If nullptr is given,
          * the computations are performed by the main thread.
          */
-        ReadLayer(const matrix2d_i& data,
-                  const matrix3d_d& model,
+        ReadLayer(const Matrix2D<int>& data,
+                  const Matrix3D<double>& model,
                   bool flip,
                   ThreadPool* threads = nullptr) ;
 
         /*!
          * Destructor
          */
         virtual ~ReadLayer() override ;
 
-        /*!
-         * \brief Initialises the references randomly.
-         * Generates the initial references by randomly
-         * assigning the data to the classes using a beta
-         * distribution.
-         */
-        virtual void seed_model_randomly() override ;
-
-        /*!
-         * \brief Sets the model values by
-         * sampling rows in the data and
-         * assigning them as initial model
-         * values.
-         */
-        virtual void seed_model_sampling() override ;
-
-        /*!
-         * \brief Sets the model values by
-         * using the first n_class rows in data.
-         */
-        virtual void seed_model_toy() override ;
-
         /*!
          * \brief Computes the log likelihood of the data
          * given the current model parameters.
          * During this process, a normalized version of the
          * models, having a sum of signal of 1 count in average,
          * is used (a copy of the models is normalized, meaning
          * that the original models can still be retrieved the
          * dedicated getter).
          * \param logliklihood a matrix to store the
          * results. It should have the following dimensions :
          * 1st : same as the data number of row
          * 2nd : same as the model number of classes
          * 3rd : same as the number of shifts
          * 4th : same as the number of flip states
          * \param loglikelihood_max a vector containing the
          * max value for each row of loglikelihood.
          * Its length should be equal to the data row number.
          * \param threads a pointer to a thread pool to
          * parallelize the computations. If nullptr is given,
          * the computations are performed by the main thread.
          * \throw std::invalid_argument if the dimensions are
          * incorrect.
          */
-        virtual void compute_loglikelihoods(matrix4d_d& loglikelihood,
+        virtual void compute_loglikelihoods(Matrix4D<double>& loglikelihood,
                                             vector_d& loglikelihood_max,
                                             ThreadPool* threads=nullptr) const override ;
 
         /*!
          * \brief Updates the model given the posterior
          * probabilities (the probabilities of each row
          * in the data to be assigned to each class,
          * for each shift and flip state).
          * \param posterior_prob the data assignment probabilities to
          * the different classes.
          * \param threads a pointer to a thread pool to
          * parallelize the computations. If nullptr is given,
          * the computations are performed by the main thread.
          */
-        virtual void update_model(const matrix4d_d& posterior_prob,
+        virtual void update_model(const Matrix4D<double>& posterior_prob,
                                   ThreadPool* threads=nullptr) override ;
 
         /*!
          * \brief Updates the model given the posterior
          * probabilities (the probabilities of each row
          * in the data to be assigned to each class,
          * for each shift and flip state).
          * This method does the same as the virtual method it
          * overloads. The only difference is that, for run time
          * gain, it is given the sum over the columns of the
          * posterior_prob matrix which is computed by the virtual
          * method.
          * \param posterior_prob the data assignment probabilities to
          * the different classes.
          * \param posterior_prob_colsum the sum over the columns
          * (classes) of the posterior_prob matrix.
          * \param threads a pointer to a thread pool to
          * parallelize the computations. If nullptr is given,
          * the computations are performed by the main thread.
          */
-        void update_model(const matrix4d_d& posterior_prob,
+        void update_model(const Matrix4D<double>& posterior_prob,
                           const vector_d& posterior_prob_colsum,
                           ThreadPool* threads=nullptr) ;
     protected:
         /*!
          * \brief The routine that effectively performs the
          * loglikelihood computations.
          * \param from the index of the first row of the data
          * to considered.
          * \param to the index of the past last row of the data
          * to considered.
          * \param loglikelihood a matrix to store the
          * results. It should have the following dimensions :
          * 1st : same as the data number of row
          * 2nd : same as the model number of classes
          * 3rd : same as the number of shifts
          * 4th : same as the number of flip states
          * \param loglikelihood_max a vector containing the
          * max value for each row of log_likelihood.
          * Its length should be equal to the data row number.
          * \param done a promise to be filled when the routine
          * is done running.
          */
         void compute_loglikelihoods_routine(size_t from,
                                                     size_t to,
-                                                    matrix4d_d& loglikelihood,
+                                                    Matrix4D<double>& loglikelihood,
                                                     vector_d& loglikelihood_max,
                                                     std::promise<bool>& done) const ;
 
         /*!
          * \brief The routine that effectively update the model.
          * \param from the index of the first row of the
          * posterior probabilities to considered.
          * \param to the index of the past last row of the
          * posterior probabilities to considered.
          * \param posterior_prob the data assignment probabilities
          * to the different classes.
          * \param
          * \param promise a promise containing the partial model
          * computed from the given data slice. If several routines
          * work together to update the model, the promise matrices
          * need to be summed up to get the final model.
          */
         void update_model_routine(size_t from,
                                   size_t to,
-                                  const matrix4d_d& posterior_prob,
+                                  const Matrix4D<double>& posterior_prob,
                                   const vector_d& posterior_prob_colsum,
-                                  std::promise<matrix3d_d>& promise) const ;
+                                  std::promise<Matrix3D<double>>& promise) const ;
 
         /*!
          * \brief Computes the mean number of reads present in
          * each slice (of length l_model), in each row
          * of the data and store them in this->window_means.
          * \param threads a pointer to a thread pool to
          * parallelize the computations. If nullptr is given,
          * the computations are performed by the main thread.
          */
         void compute_window_means(ThreadPool* threads) ;
 
         /*!
          * \brief The routine that effectively computes the
          * window means.
          * \param from the index of the first row of the
          * data to considered.
          * \param to the index of the past last row of the
          * data to considered.
          * \param done a promise to fill when the routine
          * is done running.
          */
         void compute_window_means_routine(size_t from,
                                           size_t to,
                                           std::promise<bool>& done) ;
 
         /*!
          * \brief Checks that the argument has compatible
          * dimensions with the data and models. If this is
          * not the case, throw a std::invalid_argument with
          * a relevant message.
          * \param posterior_class_prob a vector containing the
          * class probabilities.
          * It should have a length equal to the number of
          * classes.
          * \throw std::invalid_argument if the dimensions are
          * incorrect.
          */
         void check_posterior_prob_colsum_dim(const vector_d& posterior_prob_colsum) const ;
 
         /*!
          * \brief contains the data means, for
          * each window of size l_model.
          */
-        matrix2d_d window_means ;
+        Matrix2D<double> window_means ;
 
 } ;
 
 #endif // READLAYER_HPP
diff --git a/src/Clustering/ReadModelComputer.cpp b/src/Clustering/ReadModelComputer.cpp
index dbfbd5f..fae46a0 100644
--- a/src/Clustering/ReadModelComputer.cpp
+++ b/src/Clustering/ReadModelComputer.cpp
@@ -1,43 +1,45 @@
 #include <ReadModelComputer.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix4D.hpp>
 #include <ReadLayer.hpp>
 #include <ThreadPool.hpp>
 
-ReadModelComputer::ReadModelComputer(const matrix2d_i& data,
-                                     const matrix4d_d& post_prob,
+#include <iostream>
+
+ReadModelComputer::ReadModelComputer(const Matrix2D<int>& data,
+                                     const Matrix4D<double>& post_prob,
                                      size_t n_threads)
     : ModelComputer(),
       threads(nullptr)
-{
-    // parameters
-    size_t n_class = post_prob[0].size() ;
-    size_t n_shift = post_prob[0][0].size() ;
-    size_t n_flip  = post_prob[0][0][0].size() ;
+{   // parameters
+    size_t n_class = post_prob.get_dim()[1] ;
+    size_t n_shift = post_prob.get_dim()[2] ;
+    size_t n_flip  = post_prob.get_dim()[3] ;
     bool flip      = n_flip == 2 ;
 
     // the threads
     if(n_threads)
     {   this->threads = new ThreadPool(n_threads) ; }
 
     // the data and the model
     this->data_layer = new ReadLayer(data,
                                      n_class,
                                      n_shift,
                                      flip) ;
-
     this->data_layer->update_model(post_prob,
                                    this->threads) ;
 }
 
 ReadModelComputer::~ReadModelComputer()
 {   // threads
     if(this->threads != nullptr)
     {   this->threads->join() ;
         delete this->threads ;
         this->threads = nullptr ;
     }
     // data and model
     if(this->data_layer != nullptr)
     {   delete this->data_layer ;
         this->data_layer = nullptr ;
     }
 }
diff --git a/src/Clustering/ReadModelComputer.hpp b/src/Clustering/ReadModelComputer.hpp
index 0794ea1..6341fa9 100644
--- a/src/Clustering/ReadModelComputer.hpp
+++ b/src/Clustering/ReadModelComputer.hpp
@@ -1,41 +1,42 @@
 #ifndef READMODELCOMPUTER_HPP
 #define READMODELCOMPUTER_HPP
 
 #include <ModelComputer.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix4D.hpp>
 #include <ThreadPool.hpp>
 
 
 class ReadModelComputer : public ModelComputer
 {
     public:
 
         /*!
         * \brief Constructs an object to retrieve
         * the read model given the data and their
         * classification results.
         * \param data the data.
         * \param post_prob the data class assignment
         * probabilities.
         * \param n_threads the number of parallel threads
          * to run the computations. 0 means no parallel
          * computing, everything is run on the main thread.
         */
-        ReadModelComputer(const matrix2d_i& data,
-                          const matrix4d_d& post_prob,
+        ReadModelComputer(const Matrix2D<int>& data,
+                          const Matrix4D<double>& post_prob,
                           size_t n_threads) ;
 
         /*!
          * \brief Destructor.
          */
         virtual ~ReadModelComputer() override ;
 
     protected:
         /*!
          * \brief the threads.
          */
         ThreadPool* threads ;
 
 } ;
 
 #endif // READMODELCOMPUTER_HPP
diff --git a/src/Clustering/SequenceLayer.cpp b/src/Clustering/SequenceLayer.cpp
index 0e1bc21..e923e79 100644
--- a/src/Clustering/SequenceLayer.cpp
+++ b/src/Clustering/SequenceLayer.cpp
@@ -1,587 +1,415 @@
 #include <SequenceLayer.hpp>
 #include <stdexcept>         // std::invalid_argument
 #include <limits>            // numeric_limits
 #include <cmath>             // log(), pow()
 #include <vector>
 #include <algorithm>         // std::max_element()
 
 #include <Statistics.hpp>    // beta_pmf()
 #include <Random.hpp>        // rand_real_uniform(), rand_int_uniform()
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
 #include <dna_utility.hpp>
 
 
-double SequenceLayer::score_subseq(const vector_i& seq,
+double SequenceLayer::score_subseq(const Matrix2D<int>& seq,
+                                   size_t row,
                                    size_t start,
-                                   const matrix2d_d& model_log)
+                                   const Matrix2D<double>& model_log)
 {
-    if(start > seq.size() - model_log.size())
+    if(start > seq.get_ncol() - model_log.get_nrow())
     {   char msg[4096] ;
         sprintf(msg, "Error! given start (%zu) is too high. Max value is %zu",
-                start, seq.size() - model_log.size()) ;
+                start, seq.get_ncol() - model_log.get_nrow()) ;
         throw std::invalid_argument(msg) ;
     }
-    else if(model_log.size() > seq.size())
+    else if(model_log.get_nrow() > seq.get_ncol())
     {   char msg[4096] ;
         sprintf(msg, "Error! given model is longer than sequences (%zu / %zu)",
-                model_log.size(), seq.size()) ;
+                model_log.get_nrow(), seq.get_ncol()) ;
         throw std::invalid_argument(msg) ;
     }
-    else if(model_log[0].size() != 4)
+    else if(model_log.get_ncol() != 4)
     {   char msg[4096] ;
         sprintf(msg, "Error! given model 2nd dimension is not 4 (%zu)",
-                model_log[0].size()) ;
+                model_log.get_ncol()) ;
         throw std::invalid_argument(msg) ;
     }
 
     size_t from = start ;
-    size_t to   = from + model_log.size() ; // will score [from, to)
+    size_t to   = from + model_log.get_nrow() ; // will score [from, to)
 
-    // std::cerr << "scoring subseq : " ;
     int n_code = dna::char_to_int('N') ;
     double ll = 0 ;
     for(size_t i=from, j=0; i<to; i++, j++)
-    {   int base = seq[i] ;
+    {   int base = seq(row,i) ;
         // N char -> get max score
         if(base == n_code)
-        {   ll += *(std::max_element(std::begin(model_log[j]),
-                                   std::end(model_log[j])))  ;
+        {   std::vector<double> row = model_log.get_row(j) ;
+            ll += *(std::max_element(std::begin(row),
+                                     std::end(row)))  ;
         }
         // A,C,G,T -> get its score
         else
-        {   ll += model_log[j][base] ; }
-        // std::cerr << dna::int_to_char(base) << "(" << exp(model_log[j][base]) << ")"  ;
+        {   ll += model_log(j,base) ; }
     }
-    // std::cerr << " " << ll << std::endl ;
     return ll ;
 }
 
-SequenceLayer::SequenceLayer(const matrix2d_i& data,
+SequenceLayer::SequenceLayer(const Matrix2D<int>& data,
                              size_t n_class,
                              size_t n_shift,
-                             bool flip)
-    : DataLayer(data, n_class, n_shift, flip)
+                             bool flip,
+                             bool last_class_cst)
+    : DataLayer(data, n_class, n_shift, flip),
+      last_class_cst(last_class_cst)
 {   this->n_category = 4 ;
     // initialise the empty model
-    this->model = matrix3d_d(this->n_class,
-                             matrix2d_d(this->l_model,
-                                        vector_d(this->n_category, 0))) ;
+    this->model = Matrix3D<double>(this->n_class,
+                                   this->l_model,
+                                   this->n_category,
+                                   0.) ;
 }
 
-SequenceLayer::SequenceLayer(const matrix2d_i& data,
-                             const matrix3d_d& model,
-                             bool flip)
-    :DataLayer(data, model,flip)
+SequenceLayer::SequenceLayer(const Matrix2D<int>& data,
+                             const Matrix3D<double>& model,
+                             bool flip,
+                             bool last_class_cst)
+    : DataLayer(data, model,flip),
+      last_class_cst(last_class_cst)
 {}
 
 SequenceLayer::~SequenceLayer()
 {}
 
-void SequenceLayer::seed_model_randomly()
-{
-    // get random values from a beta distribution cannot be done using boost so
-    // i) generate random number [0,1] x
-    // ii) compute f(x) where f is beta distribution
-
-    matrix2d_d prob(this->n_row, vector_d(this->n_class, 0.)) ;
-    double tot_sum = 0. ;
-
-    // sample the prob
-    // beta distribution parameters
-    // double alpha = pow(this->n_row, -0.5) ;
-    // double beta  = 1. ;
-    double alpha = 1 ;
-    double beta  = this->n_row ;
-    for(size_t i=0; i<this->n_row; i++)
-    {   double row_sum = 0. ;
-        for(size_t j=0; j<this->n_class; j++)
-        {   double x = rand_real_uniform(0., 1.0) ;
-            double p = std::max(SequenceLayer::p_min, beta_pmf(x, alpha, beta)) ;
-            prob[i][j]     = p ;
-            tot_sum       += p ;
-            row_sum       += p ;
-        }
-        // normalize
-        for(size_t j=0; j<this->n_class; j++)
-        {   prob[i][j] /= row_sum ; }
-    }
-
-    // compute the refererences
-    for(size_t i=0; i<this->n_row; i++)
-    {   for(size_t j=0; j<this->n_class; j++)
-        {   for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_model; j_ref++, j_dat++)
-            {   size_t base = this->data[i][j_dat] ;
-                this->model[j][j_ref][base] += prob[i][j] ;
-            }
-        }
-    }
-    // normalize
-    for(size_t i=0; i<this->n_class; i++)
-    {   for(size_t j=0; j<this->l_model; j++)
-        {   // sum
-            double colsum = 0. ;
-            for(size_t k=0; k<this->n_category; k++)
-            {   colsum += this->model[i][j][k] ; }
-            // normalize
-            // avoid 0's in the model to ensure that pmf_poisson() never
-            // return 0
-            for(size_t k=0; k<this->n_category; k++)
-            {  double p = this->model[i][j][k] / colsum ;
-               this->model[i][j][k] =
-                        std::max(p, SequenceLayer::p_min) ;
-            }
-        }
-    }
-}
-
-void SequenceLayer::seed_model_sampling()
-{
-    std::vector<bool> choosen(this->n_row, false) ;
-
-    double minor_weight = 1. ;
-    double major_weight = 7. ;
-
-    for(size_t i=0; i<this->n_class; )
-    {   size_t index = rand_int_uniform(size_t(0), size_t(this->n_row-1)) ;
-        // already choose
-        if(choosen[index])
-        {   ; }
-        // not yet choosen as reference
-        else
-        {   for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_model; j_ref++, j_dat++)
-            {   size_t base = this->data[index][j_dat] ;
-                double colsum = 0. ;
-                for(size_t k=0; k<this->n_category; k++)
-                {   if(k == base)
-                    {   this->model[i][j_ref][k] = major_weight ; }
-                    else
-                    {   this->model[i][j_ref][k] = minor_weight ; }
-                    colsum += this->model[i][j_ref][k] ;
-                }
-                // normalize
-                for(size_t k=0; k<this->n_category; k++)
-                {   this->model[i][j_ref][k] /= colsum ; }
-            }
-            choosen[index] = true ;
-            i++ ;
-        }
-    }
-
-    // NOTE
-    // no need to check for 0's in the model because it is guaranteed
-    // not to have any (minor and major_weights > 0) but if it
-    // changes, a check will be needed
-}
-
-/*
-void SequenceLayer::seed_model_toy()
-{
-    // sample data to initialise the references
-    std::vector<bool> choosen(this->n_row, false) ;
-
-    double minor_weight = 1. ;
-    double major_weight = 7. ;
-
-    for(size_t i=0; i<this->n_class; )
-    {   size_t index = i ;
-        // already choose
-        if(choosen[index])
-        {   ; }
-        // not yet choosen as reference
-        else
-        {   for(size_t j_ref=0, j_dat=this->n_shift/2; j_ref<this->l_model; j_ref++, j_dat++)
-            {   size_t base = this->data[index][j_dat] ;
-                double colsum = 0. ;
-                for(size_t k=0; k<this->n_category; k++)
-                {   if(k == base)
-                    {   this->model[i][j_ref][k] = major_weight ; }
-                    else
-                    {   this->model[i][j_ref][k] = minor_weight ; }
-                    colsum += this->model[i][j_ref][k] ;
-                }
-                // normalize
-                for(size_t k=0; k<this->n_category; k++)
-                {   this->model[i][j_ref][k] /= colsum ; }
-            }
-            choosen[index] = true ;
-            i++ ;
-        }
-    }
-
-    // NOTE
-    // no need to check for 0's in the model because it is guaranteed
-    // not to have any (minor and major_weights > 0) but if it
-    // changes, a check will be needed
-}
-*/
-
-void SequenceLayer::seed_model_toy()
-{
-    this->model[0][0][0] = 0.8 ;
-    this->model[0][0][1] = 0.1 ;
-    this->model[0][0][2] = 0.05 ;
-    this->model[0][0][3] = 0.05 ;
-
-    this->model[0][1][0] = 0.1 ;
-    this->model[0][1][1] = 0.7 ;
-    this->model[0][1][2] = 0.1 ;
-    this->model[0][1][3] = 0.1 ;
-
-    this->model[0][2][0] = 0.1 ;
-    this->model[0][2][1] = 0.1 ;
-    this->model[0][2][2] = 0.7 ;
-    this->model[0][2][3] = 0.1 ;
-
-    this->model[0][3][0] = 0.1 ;
-    this->model[0][3][1] = 0.1 ;
-    this->model[0][3][2] = 0.1 ;
-    this->model[0][3][3] = 0.7 ;
-
-    this->model[0][4][0] = 0.1 ;
-    this->model[0][4][1] = 0.1 ;
-    this->model[0][4][2] = 0.1 ;
-    this->model[0][4][3] = 0.7 ;
-
-    this->model[0][5][0] = 0.1 ;
-    this->model[0][5][1] = 0.1 ;
-    this->model[0][5][2] = 0.7 ;
-    this->model[0][5][3] = 0.1 ;
-
-    this->model[0][6][0] = 0.1 ;
-    this->model[0][6][1] = 0.7 ;
-    this->model[0][6][2] = 0.1 ;
-    this->model[0][6][3] = 0.1 ;
-
-    this->model[0][7][0] = 0.7 ;
-    this->model[0][7][1] = 0.1 ;
-    this->model[0][7][2] = 0.1 ;
-    this->model[0][7][3] = 0.1 ;
-}
-
-/*
-void SequenceLayer::seed_model_toy()
-{
-    this->model[0][0][0] = 0.2340 ;
-    this->model[0][0][1] = 0.4307 ;
-    this->model[0][0][2] = 0.0952 ;
-    this->model[0][0][3] = 0.2401 ;
-
-    this->model[0][1][0] = 0.1412 ;
-    this->model[0][1][1] = 0.2819 ;
-    this->model[0][1][2] = 0.4411 ;
-    this->model[0][1][3] = 0.1358 ;
-
-    this->model[0][2][0] = 0.2963 ;
-    this->model[0][2][1] = 0.1578 ;
-    this->model[0][2][2] = 0.3153 ;
-    this->model[0][2][3] = 0.2306 ;
-
-    this->model[0][3][0] = 0.1475 ;
-    this->model[0][3][1] = 0.3947 ;
-    this->model[0][3][2] = 0.2290 ;
-    this->model[0][3][3] = 0.2287 ;
-
-    this->model[0][4][0] = 0.1403 ;
-    this->model[0][4][1] = 0.1473 ;
-    this->model[0][4][2] = 0.4608 ;
-    this->model[0][4][3] = 0.2516 ;
-
-    this->model[0][5][0] = 0.2210 ;
-    this->model[0][5][1] = 0.2487 ;
-    this->model[0][5][2] = 0.2073 ;
-    this->model[0][5][3] = 0.3230 ;
-
-    this->model[0][6][0] = 0.3288 ;
-    this->model[0][6][1] = 0.1526 ;
-    this->model[0][6][2] = 0.1529 ;
-    this->model[0][6][3] = 0.3656 ;
-
-    this->model[0][7][0] = 0.1295 ;
-    this->model[0][7][1] = 0.3987 ;
-    this->model[0][7][2] = 0.2997 ;
-    this->model[0][7][3] = 0.1721 ;
-}
-*/
-
-void SequenceLayer::compute_loglikelihoods(matrix4d_d& loglikelihood,
+void SequenceLayer::compute_loglikelihoods(Matrix4D<double>& loglikelihood,
                                            vector_d& loglikelihood_max,
                                            ThreadPool* threads) const
 {
     // dimension checks
     this->check_loglikelihood_dim(loglikelihood) ;
     this->check_loglikelihood_max_dim(loglikelihood_max) ;
 
     // compute the log prob model and the log prob reverse-complement model
-    matrix3d_d model_log(this->n_class,
-                         matrix2d_d(this->l_model,
-                                    vector_d(this->n_category, 0.))) ;
-    matrix3d_d model_log_rev = model_log ;
+    std::vector<Matrix2D<double>> model_log(this->n_class,
+                                            Matrix2D<double>(this->l_model,
+                                                             this->n_category,
+                                                             0.)) ;
+    std::vector<Matrix2D<double>> model_log_rev = model_log ;
+    /*
+    Matrix3D<double> model_log(this->n_class,
+                               this->l_model,
+                               this->n_category,
+                               0.) ;
+    Matrix3D<double> model_log_rev = model_log ;
+    */
     for(size_t i=0; i<this->n_class; i++)
     {   for(size_t j=0; j<this->l_model; j++)
         {   for(size_t k=0; k<this->n_category; k++)
             {   // forward
-                model_log[i][j][k] = log(this->model[i][j][k]) ;
+                model_log[i](j,k) = log(this->model(i,j,k)) ;
                 // reverse
-                model_log_rev[i][this->l_model-j-1][this->n_category-k-1]
-                        = log(this->model[i][j][k]) ;
+                model_log_rev[i](this->l_model-j-1,this->n_category-k-1)
+                        = log(this->model(i,j,k)) ;
             }
         }
     }
 
     // don't parallelize
     if(threads == nullptr)
     {   std::promise<bool> promise ;
         std::future<bool> future = promise.get_future() ;
         this->compute_loglikelihoods_routine(0, this->n_row,
                                              loglikelihood,
                                              loglikelihood_max,
                                              model_log,
                                              model_log_rev,
                                              promise) ;
         future.get() ;
     }
     // parallelize
     else
     {   size_t n_threads = threads->getNThread() ;
         // compute the slices on which each thread will work
         std::vector<std::pair<size_t,size_t>> slices =
                 ThreadPool::split_range(0, this->n_row, n_threads) ;
 
         // get promises and futures
         // the function run by the threads will simply fill the promise with
         // "true" to indicate that they are done
         std::vector<std::promise<bool>> promises(n_threads) ;
         std::vector<std::future<bool>>  futures(n_threads) ;
         for(size_t i=0; i<n_threads; i++)
         {   futures[i] = promises[i].get_future() ; }
         // distribute work to threads
         // -------------------------- threads start --------------------------
         for(size_t i=0; i<n_threads; i++)
         {   auto slice = slices[i] ;
             threads->addJob(std::move(
                                 std::bind(&SequenceLayer::compute_loglikelihoods_routine,
                                           this,
                                           slice.first,
                                           slice.second,
                                           std::ref(loglikelihood),
                                           std::ref(loglikelihood_max),
                                           std::ref(model_log),
                                           std::ref(model_log_rev),
                                           std::ref(promises[i])))) ;
         }
         // wait until all threads are done working
         for(auto& future : futures)
         {   future.get() ; }
         // -------------------------- threads stop ---------------------------
     }
 }
 
 void SequenceLayer::compute_loglikelihoods_routine(size_t from,
                                                    size_t to,
-                                                   matrix4d_d& loglikelihood,
+                                                   Matrix4D<double>& loglikelihood,
                                                    vector_d& loglikelihood_max,
-                                                   const matrix3d_d& model_log,
-                                                   const matrix3d_d& model_log_rev,
+                                                   const std::vector<Matrix2D<double>>& model_log,
+                                                   const std::vector<Matrix2D<double>>& model_log_rev,
                                                    std::promise<bool>& done) const
 {
     // compute log likelihood
     for(size_t i=from; i<to; i++)
     {
         // set max to min possible value
         loglikelihood_max[i] = std::numeric_limits<double>::lowest() ;
 
         for(size_t j=0; j<this->n_class; j++)
         {
-            // std::cerr << model[j] << std::endl << std::endl ;
-
             for(size_t s=0; s<this->n_shift; s++)
             {   // forward strand
-                {   double ll_fw = score_subseq(this->data[i], s, model_log[j]) ;
-                    loglikelihood[i][j][s][flip_states::FORWARD] = ll_fw ;
+                {   double ll_fw = score_subseq(this->data, i, s, model_log[j]) ;
+                    loglikelihood(i,j,s,flip_states::FORWARD) = ll_fw ;
                     // keep track of max per row
                     if(ll_fw > loglikelihood_max[i])
                     {   loglikelihood_max[i] = ll_fw ; }
 
                 }
                 // reverse
                 if(this->flip)
-                {   double ll_rev = score_subseq(this->data[i], s, model_log_rev[j]) ;
-                    loglikelihood[i][j][s][flip_states::REVERSE] = ll_rev ;
+                {   double ll_rev = score_subseq(this->data, i, s, model_log_rev[j]) ;
+                    loglikelihood(i,j,s,flip_states::REVERSE) = ll_rev ;
                     // keep track of max per row
                     if(ll_rev > loglikelihood_max[i])
                     {   loglikelihood_max[i] = ll_rev ; }
                 }
             }
         }
     }
     done.set_value(true) ;
 }
 
-void SequenceLayer::update_model(const matrix4d_d& posterior_prob,
+void SequenceLayer::update_model(const Matrix4D<double>& posterior_prob,
                                  ThreadPool* threads)
 {   // don't parallelize
     if(threads == nullptr)
-    {   std::promise<matrix3d_d> promise ;
-        std::future<matrix3d_d>  future = promise.get_future() ;
+    {   std::promise<Matrix3D<double>> promise ;
+        std::future<Matrix3D<double>>  future = promise.get_future() ;
         this->update_model_routine(0,
                                    this->n_row,
                                    posterior_prob,
                                    promise) ;
-        this->model = future.get() ;
+        // this->model = future.get() ;
+        auto model = future.get() ;
+        size_t n_class_to_update = this->n_class - this->last_class_cst ;
+        for(size_t i=0; i<n_class_to_update; i++)
+        {   for(size_t j=0; j<this->l_model; j++)
+            {   for(size_t k=0; k<this->n_category; k++)
+                {   this->model(i,j,k) = model(i,j,k) ; }
+            }
+        }
     }
     // parallelize
     else
     {   size_t n_threads = threads->getNThread() ;
         // compute the slices on which each thread will work
         std::vector<std::pair<size_t,size_t>> slices =
                 ThreadPool::split_range(0, this->n_row, n_threads) ;
 
         // get promises and futures
         // the function run by the threads will simply fill the promise with
         // "true" to indicate that they are done
-        std::vector<std::promise<matrix3d_d>> promises(n_threads) ;
-        std::vector<std::future<matrix3d_d>>  futures(n_threads) ;
+        std::vector<std::promise<Matrix3D<double>>> promises(n_threads) ;
+        std::vector<std::future<Matrix3D<double>>>  futures(n_threads) ;
         for(size_t i=0; i<n_threads; i++)
         {   futures[i] = promises[i].get_future() ; }
         // distribute work to threads
         // -------------------------- threads start --------------------------
         for(size_t i=0; i<n_threads; i++)
         {   auto slice = slices[i] ;
             threads->addJob(std::move(
                                 std::bind(&SequenceLayer::update_model_routine,
                                           this,
                                           slice.first,
                                           slice.second,
                                           std::ref(posterior_prob),
                                           std::ref(promises[i])))) ;
         }
         // reinitialise the model
-        this->model = matrix3d_d(this->n_class,
-                                 matrix2d_d(this->l_model,
-                                            vector_d(this->n_category, 0))) ;
+        /*
+        this->model = Matrix3D<double>(this->n_class,
+                                       this->l_model,
+                                       this->n_category,
+                                       0.) ;
+        */
+        size_t n_class_to_update = this->n_class - this->last_class_cst ;
+        for(size_t i=0; i<n_class_to_update; i++)
+        {   for(size_t j=0; j<this->l_model; j++)
+            {   for(size_t k=0; k<this->n_category; k++)
+                {   this->model(i,j,k) = 0. ; }
+            }
+        }
         // wait until all threads are done working
         // and update the model
         for(auto& future : futures)
-        {   matrix3d_d model_part = future.get() ;
-            for(size_t i=0; i<this->n_class; i++)
+        {   Matrix3D<double> model_part = future.get() ;
+            for(size_t i=0; i<n_class_to_update; i++)
+            // for(size_t i=0; i<this->n_class; i++)
             {   for(size_t j=0; j<this->l_model; j++)
                 {   for(size_t k=0; k<this->n_category; k++)
-                    {   this->model[i][j][k] += model_part[i][j][k] ; }
+                    {   this->model(i,j,k) += model_part(i,j,k) ; }
                 }
             }
         }
         // -------------------------- threads stop ---------------------------
     }
     // make sure to have no 0 values
     for(size_t i=0; i<this->n_class; i++)
     {   for(size_t j=0; j<this->l_model; j++)
         {   for(size_t k=0; k<this->n_category; k++)
-            {   this->model[i][j][k] =
-                        std::max(this->model[i][j][k], SequenceLayer::p_min) ;
+            {   this->model(i,j,k) =
+                        std::max(this->model(i,j,k), SequenceLayer::p_min) ;
             }
         }
     }
     // normalize to get probs
     for(size_t i=0; i<this->n_class; i++)
     {   for(size_t j=0; j<this->l_model; j++)
         {   double sum = 0. ;
             for(size_t k=0; k<this->n_category; k++)
-            {   sum += this->model[i][j][k] ; }
+            {   sum += this->model(i,j,k) ; }
             for(size_t k=0; k<this->n_category; k++)
-            {   double p = this->model[i][j][k] / sum ;
-                this->model[i][j][k] = p ;
+            {   double p = this->model(i,j,k) / sum ;
+                this->model(i,j,k) = p ;
                 /*
-                this->model[i][j][k] =
+                this->model(i,j,k) =
                         std::max(p, SequenceLayer::p_min) ;
                */
             }
         }
     }
 }
 
 
 void SequenceLayer::update_model_routine(size_t from,
                                          size_t to,
-                                         const matrix4d_d& posterior_prob,
-                                         std::promise<matrix3d_d>& promise) const
-{
-    // dimension checks
+                                         const Matrix4D<double>& posterior_prob,
+                                         std::promise<Matrix3D<double>>& promise) const
+{   // dimension checks
     this->check_posterior_prob_dim(posterior_prob) ;
 
-    matrix3d_d model = matrix3d_d(this->n_class,
-                                  matrix2d_d(this->l_model,
-                                             vector_d(this->n_category, 0))) ;
+    Matrix3D<double> model(this->n_class,
+                           this->l_model,
+                           this->n_category,
+                           0.) ;
 
     // the int code of A, C, G, T, N
     static int a_code = dna::char_to_int('A') ;
     static int c_code = dna::char_to_int('C') ;
     static int g_code = dna::char_to_int('G') ;
     static int t_code = dna::char_to_int('T') ;
     static int n_code = dna::char_to_int('N') ;
     // the int code of the reverse complement of A, C, G, T
     static int a_code_r = dna::char_to_int('A', true) ;
     static int c_code_r = dna::char_to_int('C', true) ;
     static int g_code_r = dna::char_to_int('G', true) ;
     static int t_code_r = dna::char_to_int('T', true) ;
 
-    for(size_t k=0; k < this->n_class; k++)
+    size_t n_class_to_update = this->n_class - this->last_class_cst ;
+
+    for(size_t k=0; k < n_class_to_update; k++)
+    // for(size_t k=0; k < this->n_class; k++)
     {   for(size_t s=0; s<this->n_shift; s++)
         {   for(size_t j=0; j<this->l_model; j++)
-            {
-                // base prob on fw and rv strand
-                vector_d base_prob(this->n_category,    0.) ;
-                vector_d base_prob_rev(this->n_category,0.) ;
-
+            {   // base prob on fw and rv strand
+                vector_d base_prob_fw(this->n_category, 0.) ;
+                vector_d base_prob_rv(this->n_category, 0.) ;
                 for(size_t i=from; i<to; i++)
-                {   int base     = this->data[i][s+j] ;
+                {   int base     = this->data(i,s+j) ;
                     int base_rev = this->n_category - base - 1 ;
                     // N
                     if(base == n_code)
                     {   // --------------- forward ---------------
-                        {   base_prob[a_code] +=
-                                    posterior_prob[i][k][s][SequenceLayer::FORWARD] ;
-                            base_prob[c_code] +=
-                                    posterior_prob[i][k][s][SequenceLayer::FORWARD] ;
-                            base_prob[g_code] +=
-                                    posterior_prob[i][k][s][SequenceLayer::FORWARD] ;
-                            base_prob[t_code] +=
-                                    posterior_prob[i][k][s][SequenceLayer::FORWARD] ;
+                        {   base_prob_fw[a_code] +=
+                                    posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
+                            base_prob_fw[c_code] +=
+                                    posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
+                            base_prob_fw[g_code] +=
+                                    posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
+                            base_prob_fw[t_code] +=
+                                    posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
                         }
                         // --------------- reverse ---------------
                         if(this->flip)
-                        {   base_prob_rev[a_code_r] +=
-                                    posterior_prob[i][k][s][SequenceLayer::REVERSE] ;
-                            base_prob_rev[c_code_r] +=
-                                    posterior_prob[i][k][s][SequenceLayer::REVERSE] ;
-                            base_prob_rev[g_code_r] +=
-                                    posterior_prob[i][k][s][SequenceLayer::REVERSE] ;
-                            base_prob_rev[t_code_r] +=
-                                    posterior_prob[i][k][s][SequenceLayer::REVERSE] ;
+                        {   base_prob_rv[a_code_r] +=
+                                    posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
+                            base_prob_rv[c_code_r] +=
+                                    posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
+                            base_prob_rv[g_code_r] +=
+                                    posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
+                            base_prob_rv[t_code_r] +=
+                                    posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
                         }
                     }
                     // A, C, G, T
                     else
-                    {   // --------------- forward ---------------
-                        {   base_prob[base] +=
-                                    posterior_prob[i][k][s][SequenceLayer::FORWARD] ;
+                    {   {   base_prob_fw[base] +=
+                                    posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
                         }
                         // --------------- reverse ---------------
                         if(this->flip)
-                        {   base_prob_rev[base_rev] +=
-                                    posterior_prob[i][k][s][SequenceLayer::REVERSE] ;
+                        {   base_prob_rv[base_rev] +=
+                                    posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
                         }
                     }
                 }
                 // update this position of the model
-                for(size_t i=0,i_rev=base_prob.size()-1; i<base_prob.size(); i++,i_rev--)
+                for(size_t i=0,i_rev=base_prob_fw.size()-1; i<base_prob_fw.size(); i++,i_rev--)
                 {   // --------------- forward ---------------
-                    {   model[k][j][i] += base_prob[i] ; }
+                    {   model(k,j,i) += base_prob_fw[i] ; }
                     // --------------- reverse ---------------
                     if(this->flip)
-                    {   model[k][this->l_model-j-1][i] += base_prob_rev[i] ; }
+                    {   model(k,this->l_model-j-1,i) += base_prob_rv[i] ; }
                 }
             }
         }
     }
     promise.set_value(model) ;
 }
+
+void SequenceLayer::set_class(size_t i, const Matrix2D<double>& motif)
+{   // check dimensions
+    if(motif.get_nrow() != this->n_category)
+    {   char msg[4096] ;
+        sprintf(msg, "Error! the given class model is incompatible "
+                     "with the SequenceLayer (%zu rows instead of %zu)",
+                motif.get_nrow(), this->n_category) ;
+        throw std::invalid_argument(msg) ;
+    }
+    else if(motif.get_ncol() != this->l_model)
+    {   char msg[4096] ;
+        sprintf(msg, "Error! the given class model is incompatible "
+                     "with the SequenceLayer (%zu columns instead of %zu)",
+                motif.get_ncol(), this->l_model) ;
+        throw std::invalid_argument(msg) ;
+    }
+
+    for(size_t j=0; j<motif.get_ncol(); j++)
+    {   for(size_t k=0; k<motif.get_nrow(); k++)
+        {   this->model(i,j,k) = motif(k,j) ; }
+    }
+}
diff --git a/src/Clustering/SequenceLayer.hpp b/src/Clustering/SequenceLayer.hpp
index 844c3e5..a31d2bb 100644
--- a/src/Clustering/SequenceLayer.hpp
+++ b/src/Clustering/SequenceLayer.hpp
@@ -1,176 +1,203 @@
 #ifndef SEQUENCELAYER_HPP
 #define SEQUENCELAYER_HPP
 
 #include <DataLayer.hpp>
 #include <iostream>
 #include <string>
 #include <future>        // std::promise, std::future
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
 #include <ThreadPool.hpp>
 
+typedef std::vector<double> vector_d ;
+
 class SequenceLayer : public DataLayer
 {
     public:
         /*!
          * \brief Computes the log-likelihood of the sub-
-         * sequence starting at the offset <start> in the given
-         * sequence. The subsequence length is determined
-         * by the model lenght.
+         * sequence - stored in a given row - and starting
+         * at the offset <col> in the given sequence matrix.
+         * The subsequence length is determined by the model
+         * lenght.
          * \param seq the sequences in integer format.
-         * \param start the index at which the sub-sequence
+         * \param row the row containing the sequence of
+         * interest.
+         * \param col the index at which the sub-sequence
          * is starting (1st index inside the subsequence
          * of interest).
          * \param model_log a model containing the log
          * probability model.
          * \return the log-likelihood of the sub-sequence
          * given the model.
          * \throw std::invalid_argument if 1) the offset is
          * invalid, 2) the sequence and the model have
          * incompatible dimensions or 3) the model 2n dimension
          * is not 4 (A,C,G,T).
          */
-        static double score_subseq(const vector_i& seq,
-                                   size_t start,
-                                   const matrix2d_d& model_log) ;
+        static double score_subseq(const Matrix2D<int>& seq,
+                                   size_t row,
+                                   size_t col,
+                                   const Matrix2D<double>& model_log) ;
 
     public:
         /*!
          * \brief Constructs an object with the
          * given data and an empty (0 values)
          * model.
          * \param data the data.
          * \param n_class the number of classes
          * of the model.
          * \param n_shift the number of shift
          * states of the model.
          * \param flip whether flipping is allowed.
+         * \param last_class_cst indicates that the
+         * last class of the model is constant
+         * and will never be updated by calls to
+         * update_model().
          */
-        SequenceLayer(const matrix2d_i& data,
+        SequenceLayer(const Matrix2D<int>& data,
                       size_t n_class,
                       size_t n_shift,
-                      bool flip) ;
+                      bool flip,
+                      bool last_class_cst) ;
 
         /*!
         * \brief Construct an object with the
         * given data and model.
+        * The shifting freedom is set to (data number
+        * of columns) - (the model 2nd dimension)
+        * + 1.
         * \param data the data. The sequences
         * should be stored as integer values :
         * A:0, C:1, G:2, T:3, else:5.
-        * \param the model.
+        * \param model the model with the following
+        * dimensions :
+        * dim1 the number of classes
+        * dim2 the model length
+        * dim3 4 (A,C,G,T)
         * \param flip whether flipping is allowed.
+        * \param last_class_cst indicates that the
+         * last class of the model is constant
+         * and will never be updated by calls to
+         * update_model().
         */
-        SequenceLayer(const matrix2d_i& data,
-                      const matrix3d_d& model,
-                      bool flip) ;
+        SequenceLayer(const Matrix2D<int>& data,
+                      const Matrix3D<double>& model,
+                      bool flip,
+                      bool last_class_cst) ;
 
         /*!
          * Destructor
          */
         virtual ~SequenceLayer() override ;
 
-        /*!
-         * \brief Sets the model values randomly.
-         */
-        virtual void seed_model_randomly() ;
-
-        /*!
-         * \brief Sets the model values by
-         * sampling rows in the data and
-         * assigning them as initial model
-         * values.
-         */
-        virtual void seed_model_sampling() ;
-
-        /*!
-         * \brief Sets the model values by
-         * using the first n_class rows in data.
-         */
-        virtual void seed_model_toy() ;
-
-
         /*!
          * \brief Computes the log likelihood of the data
          * given the current model parameters.
          * \param logliklihood a matrix to store the
          * results. It should have the following dimensions :
          * 1st : same as the data number of row
          * 2nd : same as the model number of classes
          * 3rd : same as the number of shifts
          * 4th : same as the number of flip states
          * \param loglikelihood_max a vector containing the
          * max value for each row of loglikelihood.
          * Its length should be equal to the data row number.
          * \throw std::invalid_argument if the dimensions are
          * incorrect.
          */
-        virtual void compute_loglikelihoods(matrix4d_d& loglikelihood,
+        virtual void compute_loglikelihoods(Matrix4D<double>& loglikelihood,
                                             vector_d& loglikelihood_max,
                                             ThreadPool* threads=nullptr) const override ;
 
         /*!
          * \brief Updates the model given the posterior
          * probabilities (the probabilities of each row
          * in the data to be assigned to each class,
          * for each shift and flip state).
          * \param posterior_prob the data assignment probabilities to
          * the different classes.
          */
-        virtual void update_model(const matrix4d_d& posterior_prob,
+        virtual void update_model(const Matrix4D<double>& posterior_prob,
                                   ThreadPool* threads=nullptr) override ;
 
+        /*!
+         * \brief Modify the values of th given class
+         * with the given parameters.
+         * The given motif should have the same length
+         * as the current model classes.
+         * \param i the index of the class to modify, 0-based.
+         * \param motif the new parameters values.
+         * Its dimensions should be :
+         * 1st : 4 for A,C,G,T
+         * 2nd : the model length.
+         * \throw std::invalid_argument if the dimensions are not
+         * compatible with the current model classes.
+         */
+        void set_class(size_t i, const Matrix2D<double>& motif) ;
+
     protected:
         /*!
          * \brief The routine that effectively performs the
          * loglikelihood computations.
          * \param from the index of the first row of the data
          * to considered.
          * \param to the index of the past last row of the data
          * to considered.
          * \param loglikelihood a matrix to store the
          * results. It should have the following dimensions :
          * 1st : same as the data number of row
          * 2nd : same as the model number of classes
          * 3rd : same as the number of shifts
          * 4th : same as the number of flip states
          * \param loglikelihood_max a vector containing the
          * max value for each row of log_likelihood.
          * Its length should be equal to the data row number.
-         * \param model_log a matrix containing the log value
-         * of the model.
-         * \param model_log_rev a matrix containing the log values
-         * of the reverse strand model (the 1st position in the model
-         * becomes the last in the reverse model and probabilities are
-         * swapped A<->T and C<->G).
+         * \param model_log a vector containing the matrices with
+         * the log values of the model for each class.
+         * \param model_log_rev a vector containing the matrices with
+         * the log values of the reverse strand model for each class
+         * (the 1st position in the model becomes the last in the
+         * reverse model and probabilities are swapped A<->T and C<->G).
          * \param done a promise to be filled when the routine
          * is done running.
          */
         void compute_loglikelihoods_routine(size_t from,
                                             size_t to,
-                                            matrix4d_d& loglikelihood,
+                                            Matrix4D<double>& loglikelihood,
                                             vector_d& loglikelihood_max,
-                                            const matrix3d_d& model_log,
-                                            const matrix3d_d& model_log_rev,
+                                            const std::vector<Matrix2D<double>>& model_log,
+                                            const std::vector<Matrix2D<double>>& model_log_rev,
                                             std::promise<bool>& done) const ;
 
         /*!
          * \brief The routine that effectively update the model.
          * \param from the index of the first row of the
          * posterior probabilities to considered.
          * \param to the index of the past last row of the
          * posterior probabilities to considered.
          * \param posterior_prob the data assignment probabilities
          * to the different classes.
          * \param
          * \param done a promise containing the partial model
          * computed from the given data slice. If several routines
          * work together at updating the model, they need to be
          * summed and normalized (by the column sum) to get the
          * final model.
          */
         void update_model_routine(size_t from,
                                   size_t to,
-                                  const matrix4d_d& posterior_prob,
-                                  std::promise<matrix3d_d>& done) const ;
+                                  const Matrix4D<double>& posterior_prob,
+                                  std::promise<Matrix3D<double>>& done) const ;
+
+        /*!
+         * \brief A flag indicating that the last class of the model
+         * is constant and should not be updated when calling
+         * update_model().
+         */
+        bool last_class_cst ;
 
 } ;
 #endif // SEQUENCELAYER_HPP
diff --git a/src/Clustering/SequenceModelComputer.cpp b/src/Clustering/SequenceModelComputer.cpp
index b267a4a..0ba262f 100644
--- a/src/Clustering/SequenceModelComputer.cpp
+++ b/src/Clustering/SequenceModelComputer.cpp
@@ -1,42 +1,45 @@
 #include <SequenceModelComputer.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix4D.hpp>
 #include <SequenceLayer.hpp>
 
-SequenceModelComputer::SequenceModelComputer(const matrix2d_i& data,
-                                             const matrix4d_d& post_prob,
+SequenceModelComputer::SequenceModelComputer(const Matrix2D<int>& data,
+                                             const Matrix4D<double>& post_prob,
                                              size_t n_threads)
     : ModelComputer(),
       threads(nullptr)
 {
     // parameters
-    size_t n_class = post_prob[0].size() ;
-    size_t n_shift = post_prob[0][0].size() ;
-    size_t n_flip  = post_prob[0][0][0].size() ;
+    size_t n_class = post_prob.get_dim()[1] ;
+    size_t n_shift = post_prob.get_dim()[2] ;
+    size_t n_flip  = post_prob.get_dim()[3] ;
     bool flip      = n_flip == 2 ;
 
     // the threads
     if(n_threads)
     {   this->threads = new ThreadPool(n_threads) ; }
 
     // the data and the model
     this->data_layer = new SequenceLayer(data,
                                          n_class,
                                          n_shift,
-                                         flip) ;
+                                         flip,
+                                         false) ;
 
     this->data_layer->update_model(post_prob,
                                    this->threads) ;
 }
 
 SequenceModelComputer::~SequenceModelComputer()
 {   // threads
     if(this->threads != nullptr)
     {   this->threads->join() ;
         delete this->threads ;
         this->threads = nullptr ;
     }
     // data and model
     if(this->data_layer != nullptr)
     {   delete this->data_layer ;
         this->data_layer = nullptr ;
     }
 }
diff --git a/src/Clustering/SequenceModelComputer.hpp b/src/Clustering/SequenceModelComputer.hpp
index 9c69b97..b1b6842 100644
--- a/src/Clustering/SequenceModelComputer.hpp
+++ b/src/Clustering/SequenceModelComputer.hpp
@@ -1,41 +1,42 @@
 #ifndef SEQUENCEMODELCOMPUTER_HPP
 #define SEQUENCEMODELCOMPUTER_HPP
 
 #include <ModelComputer.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix4D.hpp>
 #include <ThreadPool.hpp>
 
 
 class SequenceModelComputer : public ModelComputer
 {
     public:
 
         /*!
         * \brief Constructs an object to retrieve
         * the sequence model given the data and their
         * classification results.
         * \param data the data.
         * \param post_prob the data class assignment
         * probabilities.
         * \param n_threads the number of parallel threads
          * to run the computations. 0 means no parallel
          * computing, everything is run on the main thread.
         */
-        SequenceModelComputer(const matrix2d_i& data,
-                              const matrix4d_d& post_prob,
+        SequenceModelComputer(const Matrix2D<int>& data,
+                              const Matrix4D<double>& post_prob,
                               size_t n_threads) ;
 
         /*!
          * \brief Destructor.
          */
         virtual ~SequenceModelComputer() override ;
 
     protected:
         /*!
          * \brief the threads.
          */
         ThreadPool* threads ;
 
 } ;
 
 #endif // SEQUENCEMODELCOMPUTER_HPP
diff --git a/src/GenomicTools/CorrelationMatrixCreator.cpp b/src/GenomicTools/CorrelationMatrixCreator.cpp
index 44162a6..0197664 100644
--- a/src/GenomicTools/CorrelationMatrixCreator.cpp
+++ b/src/GenomicTools/CorrelationMatrixCreator.cpp
@@ -1,375 +1,374 @@
 #include <string>
 #include <vector>
 #include <stdexcept>  // std::runtime_error
 
 #include <seqan/bam_io.h>  // BamFileIn
 #include <seqan/bed_io.h>  // BedFileIn
 
 #include <CorrelationMatrixCreator.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
 
 /*
 template<class T>
 std::ostream& operator << (std::ostream& stream, const std::list<T>& l)
 {
     for(const auto& p : l)
     {   stream << p << " " ; }
     return stream ;
 }
 
 template<class T>
 std::ostream& operator << (std::ostream& stream, const std::vector<T>& v)
 {
     for(const auto& p : v)
     {   stream << p << " " ; }
     return stream ;
 }
 
 template<class T, class U>
 std::ostream& operator << (std::ostream& stream, const std::pair<T,U>& p)
 {
     stream << "[" << p.first << " " << p.second << "] " ;
     return stream ;
 }
 
 template<class T, class U>
 std::ostream& operator << (std::ostream& stream, const std::unordered_map<T,U>& m)
 {
     for(const auto& p : m)
     {   stream << p << " " << std::endl; }
     return stream ;
 }
 */
 
 /* A lambda to sort GenomeRegion by ascending starting coordinate
  */
 auto sortByStartPos = [](const GenomeRegion& r1, const GenomeRegion& r2) -> bool
 { return r1 < r2 ;
 } ;
 
 CorrelationMatrixCreator::CorrelationMatrixCreator(const std::string& bed_file_path,
                                                    const std::string& bam_file_path,
                                                    const std::string& bai_file_path,
                                                    int from,
                                                    int to,
                                                    int bin_size,
                                                    CorrelationMatrixCreator::methods method)
     : ReadMatrixCreator(bed_file_path,
                         bam_file_path,
                         bai_file_path,
                         from,
                         to,
                         bin_size,
                         method),
       target_list_fw(),
       target_list_rv()
 {
     seqan::BedRecord<seqan::Bed3> bed_line ;
 
     // compute coordinates relative to each region
     this->compute_relative_bin_coord() ;
     size_t n_col = this->relative_bin_coord.size() ;
 
     // compute number of regions and get valid chromosomes names
     this->open_bed_file() ;
     this->open_bam_file() ;
     seqan::BamHeader header ;
     seqan::readHeader(header, bam_file) ;
     size_t n_row = 0 ;
     while(not seqan::atEnd(this->bed_file))
     {   seqan::readRecord(bed_line, this->bed_file) ;
         std::string chrom_name = seqan::toCString(bed_line.ref) ;
         // new chromosome
         if(this->chrom_map_names.find(chrom_name) ==
                 this->chrom_map_names.end())
         {   int chrom_idx = -1 ;
             seqan::getIdByName(chrom_idx,
                                seqan::contigNamesCache(seqan::context(this->bam_file)),
                                chrom_name) ;
             this->chrom_map_names[chrom_name] = chrom_idx ;
         }
         n_row++ ;
     }
     this->close_bed_file() ;
     this->close_bam_file() ;
 
     // create the count matrix
-    this->matrix_counts = matrix2d_i(n_row,
-                                     vector_i(n_col, 0)) ;
+    this->matrix_counts = Matrix2D<int>(n_row, n_col, 0.) ;
     // create the region matrix
     this->matrix_bins =
         std::vector<std::vector<GenomeRegion>>
             (n_row,std::vector<GenomeRegion>(n_col)) ;
     this->open_bed_file() ;
     this->open_bam_file() ;
     size_t i = 0 ;
     while(not seqan::atEnd(this->bed_file))
     {   seqan::readRecord(bed_line, this->bed_file) ;
         // find the region limits
         std::string region_chr = seqan::toCString(bed_line.ref) ;
         // int region_len         = bed_line.endPos - bed_line.beginPos ;
         // int region_mid         = bed_line.beginPos + (region_len / 2) ;
         int region_mid = CorrelationMatrixCreator::get_center_pos(bed_line) ;
 
         // compute the absolute bins coordinates for this region
         // and create the bins in this region
         for(size_t j=0; j<n_col; j++)
         {   const auto& relative_coord = this->relative_bin_coord[j] ;
             this->matrix_bins[i][j] =
                     GenomeRegion(region_chr,
                                  this->chrom_map_names[region_chr],
                                  region_mid + relative_coord.first,
                                  region_mid + relative_coord.second) ;
         }
         i++ ;
     }
     this->close_bed_file() ;
     this->close_bam_file() ;
 }
 
 CorrelationMatrixCreator::~CorrelationMatrixCreator()
 {   this->close_bam_file() ;
     // bed file is closed in ~MatrixCreator()
 }
 
-matrix2d_i CorrelationMatrixCreator::create_matrix()
+Matrix2D<int> CorrelationMatrixCreator::create_matrix()
 {
     this->open_bam_file() ;
     this->open_bai_file() ;
 
     // read BAM header
     seqan::BamHeader bam_header ;
     seqan::readHeader(bam_header, this->bam_file) ;
 
-    for(size_t i=0; i<this->matrix_counts.size(); i++)
+    for(size_t i=0; i<this->matrix_counts.get_nrow(); i++)
     {
         const auto& row = this->matrix_bins[i] ;
         GenomeRegion region(row.front().chromosome,
                             row.front().chromosome_idx,
                             row.front().start,
                             row.back().end) ;
 
         bool jump = this->jump_upstream(region, 600) ;
         if(not jump)
         {   continue ; }
         // read all relevant targets
         this->to_downstream_target(region) ;
         // update count matrix row
         this->update_count_matrix(i) ;
         // clean buffers
         this->clear_target_lists() ;
     }
     this->close_bam_file() ;
     return this->matrix_counts ;
 }
 
 bool CorrelationMatrixCreator::jump_upstream(const GenomeRegion& region,
                                              int margin)
 {   bool has_alignment = false ;
     int rID = -10 ;
     if(this->chrom_map_names.find(region.chromosome) !=
        this->chrom_map_names.end())
     {   rID = this->chrom_map_names[region.chromosome] ; }
     else
     {   char msg[4096] ;
         sprintf(msg, "Error! chromosome %s is not linked with a valid ID in BAM file",
                 region.chromosome.c_str()) ;
         std::cerr << msg << std::endl ;
         return false ;
     }
 
     int start = std::max(0, region.start - margin) ;
     int end   = start + 1 ;
     bool jump = seqan::jumpToRegion(this->bam_file,
                                     has_alignment,
                                     rID,
                                     start,
                                     end,
                                     this->bai_file) ;
     return jump ;
 }
 
 void CorrelationMatrixCreator::to_downstream_target(const GenomeRegion& region)
 {   if(this->method == CorrelationMatrixCreator::methods::READ or
        this->method == CorrelationMatrixCreator::methods::READ_ATAC)
     {   this->to_downstream_read(region) ; }
     else
     {   this->to_downstream_fragment(region) ; }
 }
 
 void CorrelationMatrixCreator::to_downstream_read(const GenomeRegion& region)
 {   bool done = false ;
 
     seqan::BamAlignmentRecord record ;
 
     while(not seqan::atEnd(this->bam_file) and
           not done)
     {   // QC check and transform record
         seqan::readRecord(record, this->bam_file) ;
         if(not CorrelationMatrixCreator::is_good_read(record) or
            not this->is_valid_chromosome(record))
         {   continue ; }
 
         GenomeRegion target ;
         try
         {   if(this->method == CorrelationMatrixCreator::methods::READ)
             {   target = GenomeRegion::constructRead(record, this->bam_file) ; }
             else
             {   target = GenomeRegion::constructReadATAC(record, this->bam_file) ; }
         }
         catch(std::invalid_argument& e)
         {   // connect to cerr to write in SAM
             seqan::BamFileOut samFileOut(seqan::context(this->bam_file),
                                          std::cerr,
                                          seqan::Sam()) ;
             std::cerr << "std::invalid_argument caught! could not use "
                          "this record as read: " << std::endl ;
             writeRecord(samFileOut, record) ;
             std::cerr << "message was : " << e.what() << std::endl << std::endl ;
             continue ;
         }
 
         // upstream -> continue
         if(target < region)
         {   continue ; }
         // overlap -> store
         else if(target | region)
         {   if(not seqan::hasFlagRC(record))
             {   this->target_list_fw.push_back(target) ; }
             else
             {   this->target_list_rv.push_back(target) ; }
         }
         // downstream -> stop
         else
         {   done = true ; }
     }
 }
 
 void CorrelationMatrixCreator::to_downstream_fragment(const GenomeRegion& region)
 {
     bool done = false ;
 
     seqan::BamAlignmentRecord record ;
 
     while(not seqan::atEnd(this->bam_file) and
           not done)
     {   // QC check and transform record
         seqan::readRecord(record, this->bam_file) ;
         if(not CorrelationMatrixCreator::is_good_pair(record) or
            not this->is_valid_chromosome(record))
         {   continue ; }
 
         GenomeRegion target ;
         try
         {   target = GenomeRegion::constructFragment(record, this->bam_file) ; }
         catch(std::invalid_argument& e)
         {   // connect to cerr to write in SAM
             seqan::BamFileOut samFileOut(seqan::context(this->bam_file),
                                          std::cerr,
                                          seqan::Sam()) ;
             std::cerr << "std::invalid_argument caught! could not use "
                          "this record as fragment: " << std::endl ;
             writeRecord(samFileOut, record) ;
             std::cerr << "message was : " << e.what() << std::endl << std::endl ;
             continue ;
         }
 
         // upstream -> continue
         if(target < region)
         {    continue ; }
         // overlap -> store
         else if(target | region)
         {   if(this->method == CorrelationMatrixCreator::methods::FRAGMENT_CENTER)
             {   target = GenomeRegion::constructFragmentCenter(record,
                                                                this->bam_file) ;
                 if(target | region)
                 {   this->target_list_fw.push_back(target) ; }
             }
             else
             {   this->target_list_fw.push_back(target) ; }
         }
         // downstream -> stop
         else if(target > region)
         {   // std::cerr << std::endl ;
             done = true ;
         }
     }
     // std::cerr << "to_downstream_fragment END" << std::endl ;
 }
 
 void CorrelationMatrixCreator::clear_target_lists()
 {   this->target_list_fw.clear() ;
     this->target_list_rv.clear() ;
 }
 
 /*
 void CorrelationMatrixCreator::remove_upstream_targets(const GenomeRegion& region)
 {   // forward targets
     auto iter_fw = this->target_list_fw.cbegin() ;
     while(iter_fw != this->target_list_fw.end())
     {   // remove upstream reads
         if(*iter_fw < region)
         {   iter_fw = this->target_list_fw.erase(iter_fw) ; }
         // keep overlapping reads, don't stop here
         else if(*iter_fw | region)
         {   iter_fw++ ; }
         // stop at first read downstream
         else
         {   break ; }
     }
     // reverse targets
     auto iter_rv = this->target_list_rv.cbegin() ;
     while(iter_rv != this->target_list_rv.end())
     {   // remove upstream reads
         if(*iter_rv < region)
         {   iter_rv = this->target_list_rv.erase(iter_rv) ; }
         // keep overlapping reads
         else if(*iter_rv | region)
         {   iter_rv++ ; }
         // stop at first read downstream
         else
         {   break ; }
     }
 }
 */
 
 void CorrelationMatrixCreator::update_count_matrix(size_t row_index)
 {
     // forward targets
     for(const auto& iter : this->target_list_fw)
     {   auto bin_start_end = CorrelationMatrixCreator::
                                  get_bin_indices(iter, this->matrix_bins[row_index]) ;
         for(int j=bin_start_end.first; j<bin_start_end.second; j++)
-        {   this->matrix_counts[row_index][j] +=
+        {   this->matrix_counts(row_index,j) +=
                                 iter.overlap_len(this->matrix_bins[row_index][j]) ;
         }
     }
     // reverse targets
     for(const auto& iter : this->target_list_rv)
     {   auto bin_start_end = CorrelationMatrixCreator::
                                  get_bin_indices(iter, this->matrix_bins[row_index]) ;
         for(int j=bin_start_end.first; j<bin_start_end.second; j++)
-        {   this->matrix_counts[row_index][j] +=
+        {   this->matrix_counts(row_index,j) +=
                                 iter.overlap_len(this->matrix_bins[row_index][j]) ;
         }
     }
 }
 
 /*
 void CorrelationMatrixCreator::update_count_matrix_naive(size_t row_index)
 {   // forward targets
     for(const auto& iter : target_list_fw)
     {   for(size_t j=0; j<this->matrix_counts[0].size(); j++)
         {   this->matrix_counts[row_index][j] +=
                     iter.overlap_len(this->matrix_bins[row_index][j]) ;
         }
     }
     // reverse targets
     for(const auto& iter : target_list_rv)
     {   for(size_t j=0; j<this->matrix_counts[0].size(); j++)
         {   this->matrix_counts[row_index][j] +=
                     iter.overlap_len(this->matrix_bins[row_index][j]) ;
         }
     }
 }
 */
diff --git a/src/GenomicTools/CorrelationMatrixCreator.hpp b/src/GenomicTools/CorrelationMatrixCreator.hpp
index 502616f..521f781 100644
--- a/src/GenomicTools/CorrelationMatrixCreator.hpp
+++ b/src/GenomicTools/CorrelationMatrixCreator.hpp
@@ -1,180 +1,180 @@
 #ifndef CORRELATIONMATRIXCREATOR_HPP
 #define CORRELATIONMATRIXCREATOR_HPP
 
 #include <string>
 #include <list>
 #include <future>
 
 #include <seqan/bam_io.h>  // BamFileIn
 #include <seqan/bed_io.h>  // BedFileIn
 
 #include <ReadMatrixCreator.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
 
 /*!
  * \brief The CorrelationMatrixCreator class allows
  * to create correlation matrices.
  * A correlation matrix contains the number of target
  * mapped at different positions around a set of
  * reference positions.
  * This class will read the reference positions from
  * a BED file and the targets from a BAM file. For each
  * reference, the region center is computed and then a
  * region covering the interval [from,to] is build
  * around the middle and divided into equally sized
  * bins. Finally, each bin is assigned the number of
  * target present in the BAM file that are mapped at
  * that position.
  * The final matrix contains one row per reference,
  * with the  number of targets counted at each possible
  * position (bin). relative to this reference.
  */
 class CorrelationMatrixCreator: public ReadMatrixCreator
 {
     public:
 
         CorrelationMatrixCreator() = delete ;
 
         /*!
          * \brief Constructs an object to build a
          * correlation matrix.
          * \param bed_file_path the path to the file containing
          * the references.
          * \param bam_file_path the path to the file containing
          * the targets.
          * \param bai_file_path the path to index file of the bam
          * file containing the targets.
          * \param from the upstream most relative position
          * to consider around the references. It may
          * be changed to make sure that the central bin
          * is centered on +/- 0.
          * \param to the dowmstream most relative position
          * to consider around the references. It may
          * be changed to make sure that the central bin
          * is centered on +/- 0.
          * \param bin_size the bin size in base pair.
          * \param method how the targets should be counted.
          * READ all the positions inside the reads are
          * counted.
          * READ_ATAC only the +4bp position of +strand reads
          * and the -5bp of -strand reads are counted. It
          * correspond to the insertion position in ATAC-seq
          * data.
          * FRAGMENT all the positions within fragments (the
          * genome segment between a pair of reads, reads
          * included) are counted.
          * FRAGMENT_CENTER only the central position of the
          * fragements (the genome segment between a pair of
          * reads, reads included) are counted.
          */
         CorrelationMatrixCreator(const std::string& bed_file_path,
                                  const std::string& bam_file_path,
                                  const std::string& bai_file_path,
                                  int from,
                                  int to,
                                  int bin_size,
                                  CorrelationMatrixCreator::methods method) ;
         /*!
          * Destructor.
          */
         virtual ~CorrelationMatrixCreator() ;
 
         /*!
         * \brief Computes the matrix and returns it.
         * \return the count matrix.
         */
-        virtual matrix2d_i create_matrix() override ;
+        virtual Matrix2D<int> create_matrix() override ;
 
     protected:
         /*!
          * \brief Seek in the BAM file right before the last
          * record upstream the given region. The margin
          * parameters allows to modify the region start
          * value.
          * To read a record within the region, a read
          * operation is required to get ride of the
          * record right
          * \param region the region in front of which the
          * pointer is desired.
          * \param margin
          * which streams in the stream vectors to use.
          * \return whether the reading pointer could be moved
          * to the desired position.
          */
         bool jump_upstream(const GenomeRegion& region,
                            int margin) ;
 
         /*!
          * \brief A generic routine that reads the following records
          * until finding the first one located downstream the region
          * of interest (the definition of the first target downstream
          * the region of interest depends if READ/READ_ATAC/FRAGMENT
          * or FRAGMENT_CENTER is set as method).
          * All record overlapping the region of interest are stored
          * in the target lists.
          * The reading pointer is supposed to be located
          * upstream the region of interest. If this is note the case,
          * the method will read records until reaching the end of
          * the file.
          * \param region the region of interest.
          */
         void to_downstream_target(const GenomeRegion& region) ;
 
         /*!
          * \brief The routine that reads the following records
          * until finding the first one located downstream the region
          * of interest if READ or READ_ATAC is set as method.
          * All record overlapping the region of interest are stored
          * in the target lists.
          * The reading pointer is supposed to be located
          * upstream the region of interest. If this is note the case,
          * the method will read records until reaching the end of
          * the file.
          * \param region the region of interest.
          */
         void to_downstream_read(const GenomeRegion& region) ;
 
         /*!
          * \brief The routine that reads the following records
          * until finding the first one located downstream the region
          * of interest if FRAGMENT or FRAGMENT_CENTER is set as
          * method.
          * All record overlapping the region of interest are stored
          * in the target lists.
          * The reading pointer is supposed to be located
          * upstream the region of interest. If this is note the case,
          * the method will read records until reaching the end of
          * the file.
          * \param region the region of interest.
          */
         void to_downstream_fragment(const GenomeRegion& region) ;
 
         /*!
          * \brief Clear the content of the target lists.
          */
         void clear_target_lists() ;
 
         /*!
          * \brief Update the given row of the count matrix with
          * the content of the target lists.
          * \param matrix_row_index the index of the row, in the
          * count matrix.
          */
         void update_count_matrix(size_t row_index) ;
 
         /*!
          * \brief A buffers containing the
          * target mapped on the forward strand.
          * Target without strand (fragments)
          * are also stored in this list.
          */
         std::list<GenomeRegion> target_list_fw ;
         /*!
          * \brief A buffers containing the
          * target mapped on the reverse strand.
          */
         std::list<GenomeRegion> target_list_rv ;
 
 } ;
 
 #endif // CORRELATIONMATRIXCREATOR_HPP
diff --git a/src/GenomicTools/MatrixCreator.cpp b/src/GenomicTools/MatrixCreator.cpp
index 367c755..b0090cc 100644
--- a/src/GenomicTools/MatrixCreator.cpp
+++ b/src/GenomicTools/MatrixCreator.cpp
@@ -1,41 +1,41 @@
 #include <vector>
 #include <string>
 
 #include <seqan/bed_io.h>  // BedFileIn
 
 #include <MatrixCreator.hpp>
 #include <GenomeRegion.hpp>
-#include <typedef.hpp>
+#include <Matrix2D.hpp>
 
 
 
 
 MatrixCreator::MatrixCreator(const std::string& bed_file_path,
                              int from,
                              int to)
     : bed_path(bed_file_path),
       bed_file(),
       from(from),
       to(to),
       matrix_counts()
 {}
 
 int MatrixCreator::get_center_pos(const seqan::BedRecord<seqan::Bed3>& bed_line)
 {   int region_len = bed_line.endPos - bed_line.beginPos ;
     int region_mid = bed_line.beginPos + (region_len / 2) ;
     return region_mid ;
 }
 
 MatrixCreator::~MatrixCreator()
 {   this->close_bed_file() ; }
 
 void MatrixCreator::open_bed_file()
 {   if(not seqan::open(this->bed_file, this->bed_path.c_str()))
     {   char msg[4096] ;
         sprintf(msg, "cannot open %s", this->bed_path.c_str()) ;
          throw std::runtime_error(msg) ;
     }
 }
 
 void MatrixCreator::close_bed_file()
 {   seqan::close(this->bed_file) ; }
diff --git a/src/GenomicTools/MatrixCreator.hpp b/src/GenomicTools/MatrixCreator.hpp
index 7fac5bf..746a586 100644
--- a/src/GenomicTools/MatrixCreator.hpp
+++ b/src/GenomicTools/MatrixCreator.hpp
@@ -1,103 +1,103 @@
 #ifndef MATRIXCREATOR_HPP
 #define MATRIXCREATOR_HPP
 
 #include <string>
 
 #include <seqan/bed_io.h>  // BedFileIn, BedRecord
 
 #include <GenomeRegion.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
 
 
 /*!
  * \brief The MatrixCreator class is a base class
  * to be derived by classes that are dedicated to
  * construct data matrices which rows contains
  * a signal at different positions (columns) in
  * this given region.
  */
 class MatrixCreator
 {   public:
         /*!
          * \brief Returns the central position of a bed region.
          * \param bed_line the region of interest.
          * \return the position of the center.
          */
         static int get_center_pos(const seqan::BedRecord<seqan::Bed3>& bed_line) ;
 
     public:
         /*!
          * \brief Constructs an object.
          * \param bed_file_path the path to the bed file
          * containing the coordinates of the regions of
          * interest.
          * \param from the downstream most position
          * to consider, relative to a set of genomic
          * positions.
          * \param to the upstream most position to
          * consider, relative to a set of genomic
          * positions
          */
         MatrixCreator(const std::string& bed_file_path,
                       int from,
                       int to) ;
         /*!
          * Destructor.
          */
         virtual ~MatrixCreator() ;
 
         /*!
          * \brief Creates and return the count matrix.
          * \return the count matrix.
          */
-        virtual matrix2d_i create_matrix() = 0 ;
+        virtual Matrix2D<int> create_matrix() = 0 ;
 
     protected:
 
         /*!
          * \brief Opens the bed file.
          * \throw std::runtime_error if the file cannot
          * be open.
          */
         void open_bed_file() ;
 
         /*!
          * \brief Closes the bed file.
          * Does nothing if already closed.
          */
         void close_bed_file() ;
 
         /*!
          * \brief Bed file path.
          */
         std::string bed_path ;
         /*!
          * \brief An input stream to the
          * bed file.
          * Use open_bed_file() to open the stream
          * and close_bed_file() to close it.
          */
         seqan::BedFileIn bed_file ;
 
         /*!
          * \brief The smallest relative coordinate from the region
          * center to consider (included).
          */
         int from ;
         /*!
          * \brief The biggest relative coordinate from the region
          * center to consider (not included).
          */
         int to ;
         /*!
          * \brief A matrix containing the number of targets
          * found at each position around each reference.
          * This is the data structure to fill.
          */
-        matrix2d_i matrix_counts ;
+        Matrix2D<int> matrix_counts ;
 } ;
 
 
 #endif // MATRIXCREATOR_HPP
 
 
diff --git a/src/GenomicTools/ReadMatrixCreator.hpp b/src/GenomicTools/ReadMatrixCreator.hpp
index fa57760..424ec1c 100644
--- a/src/GenomicTools/ReadMatrixCreator.hpp
+++ b/src/GenomicTools/ReadMatrixCreator.hpp
@@ -1,258 +1,258 @@
 #ifndef READMATRIXCREATOR_HPP
 #define READMATRIXCREATOR_HPP
 
 #include <MatrixCreator.hpp>
 
 #include <unordered_map>
 #include <utility>         // std::pair, std::make_pair()
 
 #include <seqan/bed_io.h>  // BedFileIn
 #include <seqan/bam_io.h>  // BamFileIn, BamAlignmentRecord
 #include <GenomeRegion.hpp>
-#include <matrices.hpp>
+
 
 class ReadMatrixCreator : public MatrixCreator
 {
     public:
         /*!
          * \brief A list of values indicating how the data
          * should be handled when counting the number of
          * fragments mapped in a given bin.
          *
          * FRAGMENT : all positions within a fragment are
          * accounted for and attributed to the
          * corresponding bins :
          *       bin1    bin2
          * ----|-------|-------|------------> genome
          *   -------  -------                 fragments
          *   --> <--  --> <--                 pair of reads
          *     |||||   ||||||                 scoring positions
          * bin1 gets a score of 5 and bin2 a
          * score of 6.
          *
          * FRAGMENT_CENTER : only the central position
          * within a fragment is accounted for and
          * attributed to the corresponding bin :
          * *       bin1    bin2
          * ----|-------|-------|------------> genome
          *   -------  -------                 fragments
          *   --> <--  --> <--                 pair of reads
          *      |        |                    scoring positions
          * bin1 gets a score of 1 and bin2 also.
          *
          * READ : all positions within a read are
          * accounted for and attributed to the
          * corresponding bins :
          *       bin1    bin2
          * ----|-------|-------|------------> genome
          *   -------    -------               fragments
          *   --> <--    --> <--               reads
          *     | |||    ||| |||               scoring positions
          * bin1 gets a score of 4 and bin2 a
          * score of 6.
          *
          * READ_ATAC : only the shifted start
          * of the reads are used. Additionally, the
          * start position is shifted by +4bp(towards
          * the right) for reads on the + strand and
          * -5bp for reads on the - strand (towards the
          * left). These positions indicate the insertion
          * position in ATAC-seq data.
          *       bin1    bin2
          * ----|-------|-------|------------> genome
          *   -------    -------               fragments
          *   --> <--    --> <--               reads
          *         |    |     |               scoring positions
          * bin1 gets a score of 1 and bin2 a
          * score of 2.
          */
         enum methods {FRAGMENT=0,
                       FRAGMENT_CENTER,
                       READ,
                       READ_ATAC} ;
 
     public:
 
         /*!
          * \brief Computes which bins (from a contiguous
          * range of bins) are overlapped by a given target
          * and returns two indices corresponding to :
          * i) the index of the 1st bin overlapped by the
          * target
          * ii) the index of the past last bin overlapepd
          * by the target.
          * If the target does not overlapp any bin (it is
          * located upstream the 1st bin, downstream the
          * last bin or on a different chromosome), the
          * index pair 0,0  is returned.
          * Thus, in any case, a loop of the type
          * for(i=first,i<second,i++) can always be used
          * on the result.
          * \param target t
          * \param bins
          * \return
          */
         static std::pair<int, int> get_bin_indices(const GenomeRegion& target,
                                                    const std::vector<GenomeRegion>& bins) ;
 
         /*!
          * \brief Checks that the read is i) is mapped
          * , ii) passes QC and iii) is not a duplicate,
          * based on the flag value.
          * \param read the read of interest.
          * \return whether the read passes the above tests.
          */
         bool is_good_read(const seqan::BamAlignmentRecord& read) ;
 
         /*!
          * \brief Checks that the read is i) a good read, ii)
          * a paired read, iii) proplery aligned,  iv) the 1st
          * of the pair based on the flag values and that
          * v) they forms a proper fragment with its mate mate
          * (both read should point toward one other).
          * \param read the read of interest.
          * \return whether the read and its mate form a proper
          * fragment.
          */
         bool is_good_pair(const seqan::BamAlignmentRecord& read) ;
 
     public:
 
         ReadMatrixCreator() = delete ;
 
         /*!
          * \brief Constructs an object to create
          * a genomic count matrix.
          * \param bed_file_path the path to the file containing
          * the references.
          * \param bam_file_path the path to the file containing
          * the targets.
          * \param bai_file_path the path to index file of the bam
          * file containing the targets.
          * \param from the downstream most position
          * to consider, relative to a set of genomic
          * positions.
          * \param to the upstream most position to
          * consider, relative to a set of genomic
          * positions
          * \param bin_size the size of the bins in
          * which the regions encompassing the set
          * of genomic positions will be broken
          * into.
          * \param method how the sequenced fragments
          * should be consider when assigning counts
          * to the bins.
          */
         ReadMatrixCreator(const std::string& bed_file_path,
                           const std::string& bam_file_path,
                           const std::string& bai_file_path,
                           int from,
                           int to,
                           int bin_size,
                           ReadMatrixCreator::methods method) ;
 
         /*!
          * Destructor.
          */
         virtual ~ReadMatrixCreator() ;
 
     protected:
 
         /*!
          * \brief Binarize the given range [from,to] into
          * equal sized bins having the specified size.
          * The bin coordinates are stored in bin_coord as
          * pairs of [start,end) coordinates. One bin is
          * centered on +/- 0.
          *
          */
         void compute_relative_bin_coord() ;
 
         /*!
          * \brief Checks whether a record has a valid chromosome,
          * that is whether this chromosome has been found in the
          * bed file has well.
          * \param record a record from the bam file.
          * \return whether the record chromosome is valid.
          */
         bool is_valid_chromosome(const seqan::BamAlignmentRecord& record) ;
 
         /*!
          * \brief Opens the bam file.
          * \throw std::runtime_error if the file cannot
          * be open.
          */
         void open_bam_file() ;
 
         /*!
          * \brief Opens the bam index file.
          * \throw std::runtime_error if the file cannot
          * be open.
          */
         void open_bai_file() ;
 
         /*!
          * \brief Closes the bam file.
          * Does nothing if already closed.
          */
         void close_bam_file() ;
 
         /*!
          * \brief The bin size.
          */
         int bin_size ;
         /*!
          * \brief How to consider the sequenced fragments when computing
          * the bin values.
          */
         ReadMatrixCreator::methods method ;
         /*!
          * \brief The relative bin coordinates, compared to a given
          * position. Each bin has a pair [from,to) where <from> is the
          * 1st position within the bin and <to> is the 1st position
          * after the bin. One bin is centered on +/- 0.
          */
         std::vector<std::pair<int,int>> relative_bin_coord ;
 
         /*!
          * \brief Bam file path.
          */
         std::string bam_path ;
         /*!
          * \brief Bam index file path.
          */
         std::string bai_path ;
 
         /*!
          * \brief An input stream to the
          * bam file.
          * Use open_bam_file() to open the stream
          * and close_bam_file() to close it.
          */
         seqan::BamFileIn bam_file;
         /*!
          * \brief An input stream to the
          * bam index file.
          * Use open_bai_file() to open the stream
          * and close_bai_file() to close it.
          */
         seqan::BamIndex<seqan::Bai> bai_file ;
         /*!
          * \brief A map containing the valid chromsome
          * names as keys (as find in the bed file) and
          * their indices (as found in the BAM header)
          * as values.
          */
         std::unordered_map<std::string, int> chrom_map_names ;
         /*!
          * \brief A vector containing containing,
          * for each reference, the coordinates of
          * the genomic region covered by the bins.
          */
         std::vector<std::vector<GenomeRegion>> matrix_bins ;
 } ;
 
 
 
 
 #endif // READMATRIXCREATOR_HPP
diff --git a/src/GenomicTools/SequenceMatrixCreator.cpp b/src/GenomicTools/SequenceMatrixCreator.cpp
index 5685fb3..837cf73 100644
--- a/src/GenomicTools/SequenceMatrixCreator.cpp
+++ b/src/GenomicTools/SequenceMatrixCreator.cpp
@@ -1,115 +1,113 @@
 #include <SequenceMatrixCreator.hpp>
 
 #include <string>
-#include <matrices.hpp>
 #include <stdexcept>       // std::invalid_argument, std::runtime_error
 #include <utility>         // std::make_pair(), std::move()
 #include <unordered_map>
 
 #include <seqan/bed_io.h>  // BedFileIn, BedRecord
 #include <seqan/seq_io.h>  // seqan::SeqFileIn
 #include <dna_utility.hpp>
-
+#include <Matrix2D.hpp>
 
 SequenceMatrixCreator::SequenceMatrixCreator(const std::string& bed_file_path,
                                              const std::string& fasta_file_path,
                                              int from,
                                              int to)
     : MatrixCreator(bed_file_path,
                     from,
                     to),
       fasta_path(fasta_file_path),
       fasta_file()
 {   seqan::BedRecord<seqan::Bed3> bed_line ;
 
     // compute number of regions
     this->open_bed_file() ;
     size_t n_row = 0 ;
     size_t n_col = to - from + 1 ;
     while(not seqan::atEnd(this->bed_file))
     {   seqan::readRecord(bed_line, this->bed_file) ;
         n_row++ ;
     }
     this->close_bed_file() ;
 
     // create the count matrix
     // init to 'N' because if a part of the matrix
     // cannot be filled, it wil contain stretches of
     // 'N'
-    this->matrix_counts = matrix2d_i(n_row,
-                                     vector_i(n_col, dna::char_to_int('N'))) ;
+    this->matrix_counts = Matrix2D<int>(n_row, n_col, dna::char_to_int('N')) ;
 }
 
 SequenceMatrixCreator::~SequenceMatrixCreator()
 {   this->close_fasta_file() ;
     // bed file closed in ~MatrixCreator()
 }
 
 
-matrix2d_i SequenceMatrixCreator::create_matrix()
+Matrix2D<int> SequenceMatrixCreator::create_matrix()
 {
     std::unordered_map<std::string,seqan::Dna5String> seq_map ;
 
     // read the fasta file and store all the sequences
     this->open_fasta_file() ;
     while(not seqan::atEnd(this->fasta_file))
     {   seqan::CharString record_id ;
         seqan::Dna5String record_seq ;
         seqan::readRecord(record_id, record_seq, this->fasta_file) ;
         std::string id = seqan::toCString(record_id) ;
         // store it
         if(seq_map.find(id) == seq_map.end())
         {   seq_map.insert(std::make_pair(std::move(id),
                                           std::move(record_seq))) ;
         }
         else
         {   char msg[4096] ;
             sprintf(msg, "Error! header %s found several times in %s",
                     id.c_str(), this->fasta_path.c_str()) ;
             throw std::runtime_error(msg) ;
         }
     }
     this->close_fasta_file() ;
 
     // fill the matrix
     this->open_bed_file() ;
     size_t i=0 ;
     seqan::BedRecord<seqan::Bed3> bed_line ;
     while(not seqan::atEnd(this->bed_file))
     {   seqan::readRecord(bed_line, this->bed_file) ;
         std::string region_chr = seqan::toCString(bed_line.ref) ;
         // get sequence [from, to)
         int region_mid = MatrixCreator::get_center_pos(bed_line) ;
         int region_start = std::max(0, region_mid + from) ;
         int region_end   = region_mid + to + 1 ;
         auto iter = seq_map.find(region_chr) ;
         if(iter == seq_map.end())
         {   char msg[4096] ;
             sprintf(msg, "Error! %s sequence cannot be found in %s",
                     region_chr.c_str(), this->fasta_path.c_str()) ;
             throw std::runtime_error(msg) ;
         }
         else
         {   // auto& seq_name = iter->first ;
             auto&      seq = iter->second ;
             for(int j_seq=region_start, j_mat=0;
                 j_seq<region_end and j_seq<(int)seqan::length(iter->second);
                 j_seq++, j_mat++)
-            {   this->matrix_counts[i][j_mat] = dna::char_to_int(seq[j_seq]) ; }
+            {   this->matrix_counts(i,j_mat) = dna::char_to_int(seq[j_seq]) ; }
         }
         i++ ;
     }
     this->close_bed_file() ;
     return this->matrix_counts ;
 }
 
 void SequenceMatrixCreator::open_fasta_file()
 {   if(not seqan::open(this->fasta_file, this->fasta_path.c_str()))
     {   char msg[4096] ;
         sprintf(msg, "cannot open %s", this->fasta_path.c_str()) ;
          throw std::runtime_error(msg) ;
     }
 }
 
 void SequenceMatrixCreator::close_fasta_file()
 {   seqan::close(this->fasta_file) ; }
diff --git a/src/GenomicTools/SequenceMatrixCreator.hpp b/src/GenomicTools/SequenceMatrixCreator.hpp
index f298b09..e71dc88 100644
--- a/src/GenomicTools/SequenceMatrixCreator.hpp
+++ b/src/GenomicTools/SequenceMatrixCreator.hpp
@@ -1,65 +1,65 @@
 #ifndef SEQUENCEMATRIXCREATOR_HPP
 #define SEQUENCEMATRIXCREATOR_HPP
 
 #include <MatrixCreator.hpp>
 
 #include <seqan/seq_io.h>  // seqan::SeqFileIn
 #include <string>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
 
 class SequenceMatrixCreator : public MatrixCreator
 {
     public:
 
         SequenceMatrixCreator(const std::string& bed_file_path,
                               const std::string& fasta_file_path,
                               int from,
                               int to) ;
 
         /*!
          * \brief Destructor
          */
         virtual ~SequenceMatrixCreator() ;
 
         /*!
         * \brief Computes the matrix and returns it.
         * \return the sequence matrix.
         * \throw std::runtime_error if two sequences
         * have the same header in the fasta file or
         * if a sequence/chromosome name present
         * in the bed cannot be found as sequence
         * header in the fasta file.
         */
-        virtual matrix2d_i create_matrix() override ;
+        virtual Matrix2D<int> create_matrix() override ;
 
     protected:
         /*!
          * \brief Opens the fasta file.
          * \throw std::runtime_error if the file cannot
          * be open.
          */
         void open_fasta_file() ;
 
         /*!
          * \brief Closes the fasta file.
          * \throw std::runtime_error if the file cannot
          * be open.
          */
         void close_fasta_file() ;
 
         /*!
          * \brief Fasta file path.
          */
         std::string fasta_path ;
         /*!
          * \brief An input stream to the
          * fasta file.
          * Use open_fasta_file() to open the stream
          * and close_fasta_file() to close it.
          */
         seqan::SeqFileIn fasta_file ;
 
 } ;
 
 
 #endif // SEQUENCEMATRIXCREATOR_HPP
diff --git a/src/Matrix/Matrix.hpp b/src/Matrix/Matrix.hpp
index fa13945..835883c 100644
--- a/src/Matrix/Matrix.hpp
+++ b/src/Matrix/Matrix.hpp
@@ -1,654 +1,653 @@
 #ifndef MATRIX_HPP
 #define MATRIX_HPP
 
 
 #include <vector>
 #include <numeric> // accumulate()
 #include <iostream>
 #include <iomanip>   // setw(), setprecision(), fixed
 #include <stdexcept> // out_of_range, invalid_argument
 #include <utility>   // swap()f
 
 
 
 /*!
  * \brief The Matrix class is a generic class to store data in a matrix.
  * The matrix dimensionality can be any value : 1 is a vector, 2 is a regular
  * 2D matrix, 3 is a 3D matrix, etc.
  *
  * In order to store the data properly and to perform all operations smoothly, the
  * internal representation format differs from the "usual format". That is : the user
  * provides coordinates as (x,y,z,...) where x referes to the row number, y to
  * the column number, z the the z slice, etc.
  * Internally however, x corresponds to the column number and y to the row number.
  * Every other dimension has the same meaning.
  *
  * Internal representation :
  *
  * Here is an example of a 2x3 matrix (2D)
  *
  * {0,1,2,3,4,5} vector is turned to
  *      X
  * ---------->
  *  0  1  2 |
  *  3  4  5 | Y
  *         \|/
  *
  * dimensions are stored as {nx, ny} which corresponds to {ncol, nrow}. Coordinates
  * are given using the universal format coord=(x,y) which are interpreted as {row, col}.
  * Thus a simple swap(coord[0],coord[1]) should be performed to ensurethat the user given
  * coordinates can be used in this referencial.
  *
  *
  * Here is an example of a 2x3x2x2 matrix(4D)
  * {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23} is turned to
  *
  *      X
  * ----------->     |     |
  *  0   1   2 |     |     |
  *  3   4   5 | Y   |     |
  *           \|/    | Z   |
  *  6   7   8 |     |     |
  *  9  10  11 | Y   |     |
  *           \|/   \|/    |
  *                        | A
  * 12  13  14 |     |     |
  * 15  16  17 | Y   |     |
  *           \|/    | Z   |
  * 18  19  20 |     |     |
  * 21  22  23 | Y   |     |
  *           \|/   \|/   \|/
  *
  * dimensions are stored as {nx, ny, nz, na} which corredponds to {ncol, nrow, nz, na}.
  * Coordinates are given using the universal format coord=(x,y,z,a) which are interpreted
  * as {row, col, z, a}. Thus a simple swap(coord[0],coord[1]) should be performed to ensure
  * that the user given coordinates can be used in this referencial.
  *
  *
  */
 
 template <class T>
 class Matrix
 {
     public:
         // constructors
         Matrix() = default ;
         /*!
          * \brief Constructs an matrix with the given dimension with
          * 0 values.
          * \param dim the dimensions.
          */
         Matrix(const std::vector<size_t>& dim) ;
         /*!
          * \brief Constructs a matrix with the given dimensions and
          * initialize the values to the given value.
          * \param dim the dimensions.
          * \param value the value to initialize the matrix content
          * with.
          */
         Matrix(const std::vector<size_t>& dim, T value) ;
 
         /*!
          * \brief Copy constructor.
          * \param other the matrix to copy.
          */
         Matrix (const Matrix& other) ;
 
         /*!
          * \brief Destructor.
          */
         virtual ~Matrix() = default ;
 
         // methods
         /*!
          * \brief Gets the element at the given offset.
          * \param offset the offset of the element to get.
          * \throw std::out_of_range exception if the offset
          * is out of range.
          * \return the element.
          */
         T get(size_t offset) const ;
 
         /*!
          * \brief Gets the element at the given coordinates.
          * \param coord the coordinates of the element to get.
          * \throw std::out_of_range exception if the coordinates
          * are out of range.
          * \return the element.
          */
         T get(const std::vector<size_t>& coord) const ;
 
         /*!
          * \brief Sets the element at the given offset
          * to the given value.
          * \param offset the offset of the element to set.
          * \param value the new value.
          * \throw std::out_of_range exception if the offset
          * is out of range.
          */
         void set(size_t offset, T value) ;
         /*!
          * \brief Sets the element at the given coordinates
          * to the given value.
          * \param coord the coordinates of the element to set.
          * \param value the new value.
          * \throw std::out_of_range exception if the coordinates
          * are out of range.
          */
         void set(const std::vector<size_t>& coord, T value) ;
 
         /*!
          * \brief Gets the matrix dimensions.
          * \return the dimensions.
          */
         std::vector<size_t> get_dim() const ;
 
         /*!
          * \brief Gets the data vector.
          * \return a a vector containing the data.
          */
         std::vector<T> get_data() ;
 
         /*!
          * \brief Gets the number of dimensions (the length
          * of the dimension vector).
          * \return the number of dimensions
          */
         size_t get_dim_size() const ;
 
         /*!
          * \brief Gets the number of elements contained in the
          * matrix.
          * \return the number of element contained in the
          * matrix.
          */
         size_t get_data_size() const ;
 
         /*!
          * \brief Returns the partial products of the dimensions.
          * \return the partial products of the dimensions.
          */
         std::vector<size_t> get_dim_product() const ;
 		
         /*!
          * \brief Produces a nice representation of the matrix on the given
          * stream.
          * \param stream the stream.
          * \param precision the rounding precision.
          * \param width the column width in number of characters.
          * \param sep the character separator.
          */
         virtual void print(std::ostream& stram, size_t precision=4, size_t width=8, char sep=' ') const ;
 
         // operator
         /*!
          * \brief Assignment operator.
          * \param other an other matrix to copy the values from.
          * \return a reference to the current instance.
          */
         Matrix& operator = (const Matrix<T>& other) ;
 
         /*!
          * \brief Adds value to each element.
          * \param value the value to add.
          * \return a reference to the instance.
          */
         Matrix& operator += (T value) ;
 
         /*!
          * \brief Substracts value to each element.
          * \param value the value to substract.
          * \return a reference to the instance.
          */
         Matrix& operator -= (T value) ;
 
         /*!
          * \brief Multiplies each element by value.
          * \param value the value to multiply the elements by.
          * \return a reference to the instance.
          */
         Matrix& operator *= (T value) ;
 
         /*!
          * \brief Divides each element by value.
          * \param value the value to multiply the elements by.
          * \throw std::invalid_argument if value is 0.
          * \return a reference to the instance.
          */
         Matrix& operator /= (T value) ;
 
         /*!
          * \brief Comparison operator, returns true if
          * both matrices are identical, that is do not
          * have the same data and dimensions.
          * \param other an other matrix.
          * \return true if both matrices have the same
          * data and dimensions.
          */
         bool operator == (const Matrix<T>& other) const ;
 
         /*!
          * \brief Comparison operator, returns true if
          * both matrices are different, that is do not
          * have the same data and dimensions.
          * \param other an other matrix.
          * \return true if both matrices are different.
          */
         bool operator != (const Matrix& other) const ;
 
         /*!
          * \brief Returns a reference to the corrresponding
          * element. This method does not perform any check on
          * the coordinates.
          * \param coord coord the coordinates of the element to get.
          * \return a reference to this element.
          */
         T& operator () (const std::vector<size_t>& coord) ;
 
         /*!
          * \brief Returns a const reference to the corrresponding
          * element. This method does not perform any check on
          * the coordinates.
          * \param coord coord the coordinates of the element to get.
          * \return a const reference to this element.
          */
         const T& operator () (const std::vector<size_t>& coord) const ;
 
     protected:
         // methods
         /*!
          * \brief Computes the partial dimension products and fills
          * this->dim_prod according to the current values of
          * this->_dim and this->dim_size.
          */
         void compute_dim_product() ;
 
         /*!
          * \brief Given a vector of at least 2 dimensional coordinates,
          * it simply swaps the elements at index 0 (row number) and 1
          * (column number) to make them fit the x,y,... matrix
          * reprensetation (x:number of columns, y:number of rows).
          * \param coord a vector of coordinates (row, column, ...).
          * \return a vector of coordinates corresponding to (x,y,...).
          */
         std::vector<size_t> swap_coord(const std::vector<size_t>& coord) const ;
 
         /*!
          * \brief Complementary function of convert_coord(). Given
          * a vector of coordinates in (x,y,...) format, it turns it
          * into (row,col,...) format.
          * \param coord a vector of coordinates (x,y, ...).
          * \return a vector of coordinates corresponding to (row,col,...).
          */
         std::vector<size_t> convert_coord_back(const std::vector<size_t>& coord) const ;
 
         /*!
          * \brief Checks whether a given offset is a valid offset or
          * whether it is out of range.
          * \param offset the offset to check.
          * \return whether the offset is valid.
          */
         bool is_valid(size_t offset) const ;
 
         /*!
          * \brief Checks whether coordinates in (x,y,...) format are
          * valid or whether they are out of range.
          * \param offset the offset to check.
          * \return whether the offset is valid.
          */
         bool is_valid(const std::vector<size_t>& coord) const ;
 
         /*!
          * \brief Converts a vector of VALID (x,y,...) coordinates to a
          * the corresponding offset allowing to get an element in the
          * data vector.
          * If the coordinate vector has a (row, column, ...) format, the
          * result will be wrong.
          * \param coord a vector of coordinates with (x,y,...) format.
          * \return the corresponding offset.
          */
         size_t convert_to_offset(const std::vector<size_t>& coord) const ;
 
         /*!
          * \brief Complementary function of convert_to_offset(). Given an
          * offset, this function returns the corresponding coordinate
          * vector in (x,y,...) format.
          * \param offset a given offset.
          * \return the corresponding vector of (x,y,..) coordinates.
          */
         std::vector<size_t> convert_to_coord(size_t offset) const ;
 
         // fields
         /*!
          * \brief The dimensions values.
          */
         std::vector<size_t> _dim ;
         /*!
          * \brief Stores the data.
          */
         std::vector<T> _data ;
         /*!
          * \brief The number of dimensions.
          */
         size_t _dim_size ;
         /*!
          * \brief The number of data elements stored.
          */
         size_t _data_size ;
 
         /*!
          * \brief Contains the partial product of the dimensions. That is,
          * the ith element contains the product of all the i-1 precedent
          * dimensions :
          * element 0 : 1, element 1 : x, element 2 : x*y, element 3 : x*y*z,
          * and so one.
          * This is used for coordinates to offset and offset to coordinates
          * conversions.
          */
         std::vector<size_t> _dim_prod ;
 } ;
 
 // operators
 /*!
  * \brief Addition operator.
  * \param m the matrix of interest
  * \param value the value to add to each element.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix<T> operator + (Matrix<T> m, T value)
 {   Matrix<T> other(m) ;
     other += value ;
     return other ;
 }
 
 /*!
  * \brief Substraction operator
  * \param m the matrix of interest.
  * \param value the value to substract to each element.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix<T> operator - (Matrix<T> m, T value)
 {   Matrix<T> other(m) ;
     other -= value ;
     return other ;
 }
 
 /*!
  * \brief Multiplication operator.
  * \param m the matrix of interest.
  * \param value the value to multiply each elements by.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix<T> operator * (Matrix<T> m, T value)
 {   Matrix<T> other(m) ;
     other *= value ;
     return other ;
 }
 
 /*!
  * \brief Division operator.
  * \param m the matrix of interest.
  * \param value the value to divide each elements by.
  * \throw std::invalid_argument if value is 0.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix<T> operator / (Matrix<T> m, T value)
 {   if(value == static_cast<T>(0))
     {   throw std::invalid_argument("division by 0!") ; }
     Matrix<T> other(m) ;
     other /= value ;
     return other ;
 }
 
 /*!
  * \brief Sends a representation of the matrix to the stream.
  * \param stream the stream of interest.
  * \param m the matrix of interest.
  * \return a reference to the stream.
  */
 template<class T>
 std::ostream& operator << (std::ostream& stream, const Matrix<T>& m)
 {	m.print(stream) ;   
     return stream ;
 }
 
 
 
 // method implementation
 template<class T>
 Matrix<T>::Matrix(const std::vector<size_t>& dim)
     : Matrix(dim, 0)
 {}
 
 
 template<class T>
 Matrix<T>::Matrix(const std::vector<size_t>& dim, T value)
 {   this->_dim_size  = dim.size() ;
     this->_dim       = this->swap_coord(dim) ;
-    this->_data_size = std::accumulate(dim.begin(), dim.end(), 1, std::multiplies<size_t>()) ;
+    this->_data_size = std::accumulate(dim.begin(), dim.end(), (size_t)1, std::multiplies<size_t>()) ;
     this->_data      = std::vector<T>(this->_data_size, value) ;
     this->compute_dim_product() ;
 }
 
 template<class T>
 Matrix<T>::Matrix(const Matrix &other)
 {   *this = other ; }
 
 
 template<class T>
 T Matrix<T>::get(size_t offset) const
 {   if(not this->is_valid(offset))
     {   throw std::out_of_range("offset is out of range!") ; }
     return this->_data[offset] ;
 }
 
 template<class T>
 T Matrix<T>::get(const std::vector<size_t>& coord) const
 {   std::vector<size_t> coord_new = this->swap_coord(coord) ;
     if(not this->is_valid(coord_new))
     {   throw std::out_of_range("coordinates are out of range!") ; }
     return this->_data[this->convert_to_offset(coord_new)] ;
 }
 
 
 template<class T>
 void Matrix<T>::set(size_t offset, T value)
 {   if(not this->is_valid(offset))
     {   throw std::out_of_range("offset is out of range!") ; }
     this->_data[offset] = value ;
 }
 
 template<class T>
 void Matrix<T>::set(const std::vector<size_t>& coord, T value)
 {   std::vector<size_t> coord_new = this->swap_coord(coord) ;
     if(not this->is_valid(coord_new))
     {   throw std::out_of_range("coordinates are out of range!") ; }
     this->_data[this->convert_to_offset(coord_new)] = value ;
 }
 
 
 template<class T>
 std::vector<size_t> Matrix<T>::get_dim() const
 {   return this->swap_coord(this->_dim) ; }
 
 template<class T>
 std::vector<T> Matrix<T>::get_data()
 {   return this->_data ; }
 
 template<class T>
 size_t Matrix<T>::get_dim_size() const
 {   return this->_dim_size ; }
 
 template<class T>
 size_t Matrix<T>::get_data_size() const
 {   return this->_data_size ; }
 
 template<class T>
 std::vector<size_t> Matrix<T>::get_dim_product() const
 {   return this->_dim_prod ; }
 
 template<class T>
 void Matrix<T>::print(std::ostream& stream, size_t precision, size_t width, char sep) const
 {	stream.setf(std::ios::left) ;
     stream << std::setprecision(precision) << std::fixed ;
 	for(size_t i=0; i<this->get_data_size(); i++)
     {   stream << std::setw(width) << this->get(i) << sep ; }
 }
 
 template<class T>
 Matrix<T>& Matrix<T>::operator = (const Matrix<T>& other)
-{
-    this->_dim       = other._dim ;
+{   this->_dim       = other._dim ;
     this->_dim_size  = other._dim_size ;
     this->_data      = other._data ;
     this->_data_size = other._data_size ;
     this->_dim_prod  = other._dim_prod ;
     return *this ;
 }
 
 template<class T>
 Matrix<T>& Matrix<T>::operator += (T value)
 {   for(auto& i : this->_data)
     {   i += value ; }
     return *this ;
 }
 
 template<class T>
 Matrix<T>& Matrix<T>::operator -= (T value)
 {   for(auto& i : this->_data)
     {   i -= value ; }
     return *this ;
 }
 
 template<class T>
 Matrix<T>& Matrix<T>::operator *= (T value)
 {   for(auto& i : this->_data)
     {   i *= value ; }
     return *this ;
 }
 
 template<class T>
 Matrix<T>& Matrix<T>::operator /= (T value)
 {
     if(value == static_cast<T>(0))
     {   throw std::invalid_argument("division by 0!") ; }
 
     for(auto& i : this->_data)
     {   i /= value ; }
     return *this ;
 }
 
 template<class T>
 bool Matrix<T>::operator == (const Matrix<T>& other) const
 {   if(&other == this)
     {   return true ; }
     // check dim
     if(this->_dim_size != other._dim_size)
     {   return false ; }
     for(size_t i=0; i<this->_dim_size; i++)
     {   if(this->_dim[i] != other._dim[i])
         {   return false ; }
     }
     // check data
     if(this->_data_size != other._data_size)
     {   return false ; }
     for(size_t i=0; i<this->_data_size; i++)
     {   if(this->_data[i] != other._data[i])
         {   return false ; }
     }
     return true ;
 }
 
 template<class T>
 bool Matrix<T>::operator !=(const Matrix<T>& other) const
 {   return not ((*this) == other) ;}
 
 template<class T>
 T& Matrix<T>::operator () (const std::vector<size_t>& coord)
 {   std::vector<size_t> coord_new = this->swap_coord(coord) ;
     return this->_data[this->convert_to_offset(coord_new)] ;
 }
 
 template<class T>
 const T& Matrix<T>::operator () (const std::vector<size_t>& coord) const
 {   std::vector<size_t> coord_new = this->swap_coord(coord) ;
     return this->_data[this->convert_to_offset(coord_new)] ;
 }
 
 
 template<class T>
 void Matrix<T>::compute_dim_product()
 {   this->_dim_prod = std::vector<size_t>(this->_dim_size, 0) ;
     this->_dim_prod[0] = 1 ;
     if(this->_dim_size > 1)
     {   this->_dim_prod[1] = this->_dim[0] ; }
     if(this->_dim_size > 2)
     {   for(size_t i=2; i<this->_dim_size; i++)
         {   this->_dim_prod[i] = this->_dim_prod[i-1]*this->_dim[i-1] ; }
     }
 }
 
 
 template<class T>
 std::vector<size_t> Matrix<T>::swap_coord(const std::vector<size_t> &coord) const
 {   std::vector<size_t> coord_new = coord ;
     // reformat coord = (row,col,...) = (y,y,...) into coord = (col,row,...) = (x,y,...)
     if(this->_dim_size > 1)
     {   std::swap(coord_new[0], coord_new[1]) ; }
     return coord_new ;
 }
 
 
 template<class T>
 bool Matrix<T>::is_valid(size_t offset) const
 {   if(offset > this->_data_size-1)
     {   return false ; }
     return true ;
 }
 
 template<class T>
 bool Matrix<T>::is_valid(const std::vector<size_t>& coord) const
 {   if(coord.size() != this->_dim_size)
     {   return false ; }
     for(size_t i=0; i<coord.size(); i++)
     {   if(coord[i] > this->_dim[i])
         {   return false ; }
     }
     return true ;
 }
 
 
 
 template<class T>
 size_t Matrix<T>::convert_to_offset(const std::vector<size_t>& coord) const
 {   size_t offset = 0 ;
 
     for(size_t i=0; i<this->_dim_size; i++)
     {   offset += coord[i] * this->_dim_prod[i] ; }
 
     return offset ;
 }
 
 
 template<class T>
 std::vector<size_t> Matrix<T>::convert_to_coord(size_t offset) const
 {
     std::vector<size_t> coord(this->_dim_size, 0) ;
 
     for(int i=this->_dim_size-1; i>=0; i--)
     {   size_t c = offset / this->_dim_prod[i] ;
         coord[i] = c ;
         offset -= (this->_dim_prod[i]*c) ;
     }
 
     return coord ;
 }
 
 
 
 
 #endif // MATRIX_HPP
diff --git a/src/Matrix/Matrix2D.hpp b/src/Matrix/Matrix2D.hpp
index 297344c..53b2f15 100644
--- a/src/Matrix/Matrix2D.hpp
+++ b/src/Matrix/Matrix2D.hpp
@@ -1,481 +1,556 @@
 #ifndef MATRIX2D_HPP
 #define MATRIX2D_HPP
 
 #include <Matrix.hpp>
 
 #include <vector>
 #include <string>
 #include <fstream>  // ifstream
 #include <iostream>
 #include <iomanip>  // setw(), setprecision(), fixed
 #include <sstream>  // istringstream
 #include <stdexcept> // runtime_error, out_of_range
 
 #define BUFFER_SIZE 4096
 
 /*! The Matrix2D class is a specialisation of the Matrix
  * class to make work with 2D matrices easier.
  *
  * A text format is defined to store such matrices.
  * In this format, each row is written on a single line
  * and the values should separated by any blank character
  * (tab, space, multiple spaces, ...). Empty lines are
  * not allowed.
  *
  * ---- start ----
  *  1  2  3
  *  4  5  6
  *  7  8  9
  * ----- end -----
  *
  * Constructing a matrix from an empty file (0 bytes or only an EOL char) returns a null
  * matrix (0x0 dimensions). Writting a null matrix (that is with at least one null
  * dimension creates an empty file.
  *
  */
 template<class T>
 class Matrix2D : public Matrix<T>
 {
     public:
         // constructors
         Matrix2D() = default ;
         /*!
          * \brief Constructs a matrix with the given dimensions,
          * filled with 0 values.
          * \param nrow the number of rows.
          * \param ncol the number of columns.
          */
         Matrix2D(size_t nrow, size_t ncol) ;
         /*!
          * \brief Constructs a matrix with the given dimensions and
          * initialize the values to the given value.
          * \param nrow the number of rows.
          * \param ncol the number of columns.
          * \param value the value to initialize the matrix content
          * with.
          */
         Matrix2D(size_t nrow, size_t ncol, T value) ;
         /*!
          * \brief Copy constructor
          * \param other the matrix to copy the content from.
          */
         Matrix2D(const Matrix2D& other) ;
         /*!
          * \brief Constructs a matrix from a text file. A matrix contructed
          * from an empty file (or a file containing only one EOL char) returns
          * an empty matrix (null dimensions).
          * \param file_address the address of the file containing the matrix.
          * \throw std::runtime_error if anything happen while reading the
          * file (format error, file not found, etc).
          */
         Matrix2D(const std::string& file_address) ;
 
         /*!
          * \brief Destructor.
          */
         virtual ~Matrix2D() = default ;
 
         // methods overloaded in Matrix
         using Matrix<T>::get ;
         using Matrix<T>::set ;        
 
         // methods
         /*!
          * \brief Gets the element at the given coordinates.
          * \param row the row number of the element to set.
          * \param col the column number of the element to set.
          * \throw std::out_of_range exception if the coordinates
          * are out of range.
          * \return the element.
          */
         T get(size_t row, size_t col) const ;
         /*!
          * \brief Sets the element at the given coordinates
          * to the given value.
          * \param row the row number of the element to set.
          * \param col the column number of the element to set.
          * \param value the new value.
          * \throw std::out_of_range exception if the coordinates
          * are out of range.
          */
         void set(size_t row, size_t col, T value) ;
 
         /*!
          * \brief Gets the number of rows.
          * \return the number of rows.
          */
         size_t get_nrow() const ;
         /*!
          * \brief Gets the number of columns.
          * \return the number of columns.
          */
         size_t get_ncol() const ;
 
         /*!
          * \brief Gets the values in the i-th row.
          * \param i the row of interest.
          * \throw std::out_of_range if i is out of range.
          * \return the values in this row.
          */
         std::vector<T> get_row(size_t i) const ;
         /*!
          * \brief Gets the values in the i-th column.
          * \param i the column of interest.
          * \throw std::out_of_range if i is out of range.
          * \return the values in this column.
          */
         std::vector<T> get_col(size_t i) const ;
 
         /*!
          * \brief Sets the values of a given rows with the values of a given
          * vector.
          * \param i the row of interest.
          * \param values the new values.
          * \throw std::out_of_range if i is out of range.
          * \throw std::invalid_argument if values does not have a length equal
          * to the number of columns of the matrix.
          */
         void set_row(size_t i, const std::vector<T>& values) ;
         /*!
          * \brief Sets the values of a given column with the values of a given
          * vector.
          * \param i the column of interest.
          * \param values the new values.
          * \throw std::out_of_range if i is out of range.
          * \throw std::invalid_argument if values does not have a length equal
          * to the number of rows of the matrix.
          */
         void set_col(size_t i, const std::vector<T>& values) ;
 
         /*!
          * \brief Produces a nice representation of the matrix on the given
          * stream.
          * \param stream the stream.
          * \param precision the rounding precision.
          * \param width the column width in number of characters.
          * \param sep the character separator.
          */
         virtual void print(std::ostream& stram, size_t precision=4, size_t width=8, char sep=' ') const override ;
 
         // operators
         /*!
          * \brief Returns a reference to the corrresponding
          * element. This method does not perform any check on
          * the coordinates.
          * \param row the row number of the element to set.
          * \param col the column number of the element to set.
          * \return a reference to this element.
          */
         T& operator () (size_t row, size_t col) ;
 
         /*!
          * \brief Returns a const reference to the corrresponding
          * element. This method does not perform any check on
          * the coordinates.
          * \param row the row number of the element to set.
          * \param col the column number of the element to set.
          * \return a const reference to this element.
          */
         const T& operator () (size_t row, size_t col) const ;
 
+    private:
+        /*!
+         * \brief Converts a pair of VALID (x,y) coordinates to a
+         * the corresponding offset allowing to get an element in the
+         * data vector.
+         * \param row the row index.
+         * \param col the column index.
+         * \return the corresponding offset.
+         */
+        size_t convert_to_offset(size_t row, size_t col) const ;
+
+        /*!
+         * \brief Computes and stores the offsets at which
+         * each row start.
+         */
+        void compute_row_offsets() ;
+
+        /*!
+         * \brief Computes and stores the offsets at which
+         * each row start.
+         */
+        void compute_col_offsets() ;
+
+        /*!
+         * \brief Contains the offsets at which each row starts.
+         * Each element corresponds to the corresponding rows
+         * (1st element -> 1st row).
+         */
+        std::vector<size_t> _row_offsets ;
+        /*!
+         * \brief Contains the offsets at which each row starts.
+         * Each element corresponds to the corresponding rows
+         * (1st element -> 1st row).
+         */
+        std::vector<size_t> _col_offsets ;
+
 } ;
 
 // operators
 /*!
  * \brief Addition operator.
  * \param m the matrix of interest
  * \param value the value to add to each element.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix2D<T> operator + (Matrix2D<T> m, T value)
 {   Matrix2D<T> other(m) ;
     m += value ;
     return m ;
 }
 
 /*!
  * \brief Substraction operator
  * \param m the matrix of interest.
  * \param value the value to substract to each element.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix2D<T> operator - (Matrix2D<T> m, T value)
 {    Matrix2D<T> other(m) ;
      m -= value ;
      return m ;
 }
 
 /*!
  * \brief Multiplication operator.
  * \param m the matrix of interest.
  * \param value the value to multiply each elements by.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix2D<T> operator * (Matrix2D<T> m, T value)
 {    Matrix2D<T> other(m) ;
      m *= value ;
      return m ;
 }
 
 /*!
  * \brief Division operator.
  * \param m the matrix of interest.
  * \param value the value to divide each elements by.
  * \throw std::invalid_argument if value is 0.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix2D<T> operator / (Matrix2D<T> m, T value)
 {   if(value == static_cast<T>(0))
     {   throw std::invalid_argument("division by 0!") ; }
     Matrix2D<T> other(m) ;
     other /= value ;
     return other ;
 }
 
 /*!
  * \brief Sends a representation of the matrix to the stream.
  * \param stream the stream of interest.
  * \param m the matrix of interest.
  * \return a reference to the stream.
  */
 template<class T>
 std::ostream& operator << (std::ostream& stream, const Matrix2D<T>& m)
 {   m.print(stream) ;
     return stream ;
 }
 
 // other usefull functions
 /*!
  * \brief Produces a transpose of the given matrix.
  * \param m a matrix.
  */
 template<class T>
 Matrix2D<T> transpose(const Matrix2D<T>& m) ;
 
 
 // method implementation
 template<class T>
 Matrix2D<T> transpose(const Matrix2D<T>& m)
 {   std::vector<size_t> dim = m.get_dim() ;
     size_t nrow = dim[0] ;
     size_t ncol = dim[1] ;
     Matrix2D<T> m2(ncol, nrow, 0) ;
     for(size_t i=0; i<ncol; i++)
     {   for(size_t j=0; j<nrow; j++)
         {   m2(i,j) = m(j,i) ; }
     }
     return m2 ;
 }
 
 
 template<class T>
 Matrix2D<T>::Matrix2D(size_t nrow, size_t ncol)
     : Matrix2D<T>(nrow, ncol, static_cast<T>(0))
 {}
 
 template<class T>
 Matrix2D<T>::Matrix2D(size_t nrow, size_t ncol, T value)
-     : Matrix<T>({nrow, ncol}, value)
-{}
+     : Matrix<T>({nrow, ncol}, value),
+       _row_offsets(nrow),
+       _col_offsets(ncol)
+{   this->compute_row_offsets() ;
+    this->compute_col_offsets() ;
+}
 
 template<class T>
 Matrix2D<T>::Matrix2D(const Matrix2D<T>& other)
     : Matrix<T>(other)
-{}
+{   this->_row_offsets = other._row_offsets ;
+    this->_col_offsets = other._col_offsets ;
+}
 
 template<class T>
 Matrix2D<T>::Matrix2D(const std::string &file_address)
 //    : Matrix<T>({0,0})
 {
     this->_dim       = {0,0} ;
     this->_data      = std::vector<T>() ;
     this->_dim_size  = this->_dim.size() ;
     this->_data_size = this->_data.size() ;
     this->_dim_prod  = std::vector<size_t>(this->_dim_size, 0) ;
 
     std::ifstream file(file_address, std::ifstream::in) ;
     if(file.fail())
     {   char msg[BUFFER_SIZE] ;
         sprintf(msg, "error! cannot open %s", file_address.c_str()) ;
         throw std::runtime_error(msg) ;
     }
 
     std::string         buffer_str ;
     std::vector<T>      buffer_vec ;
     T                   buffer_T ;
 
     // read file
     size_t n_line = 0 ;
     size_t row_len = 0 ;
 
     while(getline(file, buffer_str))
     {   // check stream status and read content
         if(file.fail())
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "error! while reading %s", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
         if(buffer_str.size() == 0)
         {   // this file only contains one eol char and should be considered as empty,
             // -> returns empty matrix not an error
             if(n_line == 0 and file.peek() == EOF and file.eof())
             {  break ; }
 
             file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! while reading %s (empty line)", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
         // parse line
         buffer_vec.clear() ;
         std::istringstream buffer_ss(buffer_str) ;
         while(buffer_ss >> buffer_T)
         {   buffer_vec.push_back(buffer_T) ; }
         // check for an error which likely indicates that a value could not be
         // casted into a type T (mixed data types in the file)
         if(buffer_ss.fail() and not buffer_ss.eof())
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! could not read a line in %s (incompatible data types)", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
         // check that number of column is constant
         if(n_line == 0)
         {  row_len = buffer_vec.size() ; }
         else if(buffer_vec.size() != row_len)
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! variable number of columns in %s", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
         // update matrix content
         for(auto i : buffer_vec)
         {   this->_data.push_back(i) ;
             this->_data_size++ ;
         }
         this->_dim[1]++ ;
         n_line++ ;
     }
     file.close() ;
 
     this->_dim[0] = row_len ;
     this->compute_dim_product() ;
+
+    this->_row_offsets = std::vector<size_t>(this->_dim[1]) ;
+    this->_col_offsets = std::vector<size_t>(this->_dim[0]) ;
+    this->compute_row_offsets() ;
+    this->compute_col_offsets() ;
 }
 
 
 
 template<class T>
 T Matrix2D<T>::get(size_t row, size_t col) const
 {   try
     {   return this->get({row, col}) ; }
     catch(std::out_of_range& e)
     {   throw e ; }
 }
 
 
 template<class T>
 void Matrix2D<T>::set(size_t row, size_t col, T value)
 {   try
     {   this->set({row, col}, value) ; }
     catch(std::out_of_range& e)
     {   throw e ; }
 }
 
 
 template<class T>
 size_t Matrix2D<T>::get_nrow() const
 {   return this->_dim[1] ; }
 
 
 template<class T>
 size_t Matrix2D<T>::get_ncol() const
 {   return this->_dim[0] ; }
 
 
 template<class T>
 std::vector<T> Matrix2D<T>::get_row(size_t i) const
 {   if(i>=this->get_nrow())
     {   throw std::out_of_range("row index is out of range!") ; }
 
     std::vector<T> row(this->get_ncol()) ;
     for(size_t j=i*this->get_ncol(), n=0; n<this->get_ncol(); j++, n++)
     {   row[n] = this->_data[j] ; }
 
     return row ;
 }
 
 
 template<class T>
 std::vector<T> Matrix2D<T>::get_col(size_t i) const
 {   if(i>=this->get_ncol())
     {   throw std::out_of_range("column index is out of range!") ; }
 
     std::vector<T> col(this->get_nrow()) ;
     for(size_t j=i, n=0; n<this->get_nrow(); j+=this->get_ncol(), n++)
     {   col[n] = this->_data[j] ; }
 
     return col ;
 }
 
 
 template<class T>
 void Matrix2D<T>::set_row(size_t i, const std::vector<T>& values)
 {   if(i>=this->get_nrow())
     {   throw std::out_of_range("row index is out of range!") ; }
     else if(values.size() != this->get_ncol())
     {   throw std::invalid_argument("the given vector length is not equal to the number of columns!") ; }
 
     for(size_t j=i*this->get_ncol(), n=0; n<this->get_ncol(); j++, n++)
     {   this->_data[j] = values[n] ; }
 }
 
 
 template<class T>
 void Matrix2D<T>::set_col(size_t i, const std::vector<T>& values)
 {   if(i>=this->get_ncol())
     {   throw std::out_of_range("row index is out of range!") ; }
     else if(values.size() != this->get_nrow())
     {   throw std::invalid_argument("the given vector length is not equal to the number of rows!") ; }
 
     for(size_t n=0, j=i; n<this->get_nrow(); n++, j+=this->get_ncol())
     {   this->_data[j] = values[n] ; }
 }
 
 template<class T>
 void Matrix2D<T>::print(std::ostream& stream, size_t precision, size_t width, char sep) const
 {   stream.setf(std::ios::left) ;
     stream << std::setprecision(precision) << std::fixed ;
 
     size_t    n  = 0 ;
     size_t n_tot = this->get_nrow()*this->get_ncol() ;
 
     for(size_t i=0; i<this->get_nrow(); i++)
     {   for(size_t j=0; j<this->get_ncol(); j++, n++)
         {   stream << std::setw(width) << (*this)(i,j) << sep ; }
         if(n<n_tot)
         {   stream << std::endl ; }
     }
 }
 
 template<class T>
 T& Matrix2D<T>::operator () (size_t row, size_t col)
-{   std::vector<size_t> coord = {col, row} ;
-    return this->_data[this->convert_to_offset(coord)] ;
+{   // std::vector<size_t> coord = {col, row} ;
+    // return this->_data[this->convert_to_offset(coord)] ;
+    return this->_data[this->convert_to_offset(row, col)] ;
 }
 
 
 template<class T>
 const T& Matrix2D<T>::operator () (size_t row, size_t col) const
-{   std::vector<size_t> coord = {col, row} ;
-    return this->_data[this->convert_to_offset(coord)] ;
+{   // std::vector<size_t> coord = {col, row} ;
+    // return this->_data[this->convert_to_offset(coord)] ;
+    return this->_data[this->convert_to_offset(row, col)] ;
 }
 
 
+
+template<class T>
+void Matrix2D<T>::compute_row_offsets()
+{   for(size_t i=0; i<this->_dim[1]; i++)
+    {   this->_row_offsets[i] = i * this->_dim_prod[1] ; }
+}
+
+template<class T>
+void Matrix2D<T>::compute_col_offsets()
+{   for(size_t i=0; i<this->_dim[0]; i++)
+    {   this->_col_offsets[i] = i * this->_dim_prod[0] ; }
+}
+
+template<class T>
+size_t Matrix2D<T>::convert_to_offset(size_t row, size_t col) const
+{   /*
+    size_t offset = 0 ;
+
+    for(size_t i=0; i<this->_dim_size; i++)
+    {   offset += coord[i] * this->_dim_prod[i] ; }
+
+    return offset ;
+    */
+    size_t offset = this->_row_offsets[row] + this->_col_offsets[col] ;
+    return offset ;
+}
 #endif // MATRIX2D_HPP
 
 
diff --git a/src/Matrix/Matrix3D.hpp b/src/Matrix/Matrix3D.hpp
index a812f4b..122027f 100644
--- a/src/Matrix/Matrix3D.hpp
+++ b/src/Matrix/Matrix3D.hpp
@@ -1,444 +1,536 @@
 #ifndef MATRIX3D_HPP
 #define MATRIX3D_HPP
 
 #include <Matrix.hpp>
 
 #include <string>
 #include <vector>
 #include <iostream>
 #include <iomanip>      // setw(), setprecision(), fixed
 #include <fstream>      // ifstream
 #include <sstream>      // istringstream
 #include <stdexcept>    // runtime_error, out_of_range
 #include <algorithm>    // equal()
 
 #define BUFFER_SIZE 4096
 
 /*!
  * The Matrix3D class is a specialisation of the Matrix
  * class to make work with 3D matrices more easily.
  *
  * A text file format is defined to store such matrices. The specifications are as
  * follows :
  * Absolutely NO empty lines are allowed!
  * The following lines should contain :
  *
  * 1st line       : a slice header, ',,0' indicates that a slice of the 3rd dimension
  *                  is beginning (this is a z slice).
  * 2nd - Nth line : the firt slice, as a 2d matrix (the exemple below has dimensions 3x4).
  * N+1th line     : a slice header, ',,1' indicates that the 2nd slice is beginning.
  * N+1th - ...    : the second slice
  * and so on...
  *
  * Example of a 3x4x2 3D matrix
  * ---- start ----
  * ,,0
  *  1  2  3  4
  *  5  6  7  8
  *  8  9 10 11
  *,,1
  * 12 13 14 15
  * 16 17 18 19
  * 20 21 22 23
  * ----- end -----
  *
  * Constructing a matrix from an empty file (0 bytes or only an EOL char) returns a null
  * matrix (0x0x0 dimensions). Writting a null matrix (that is with at least one null
  * dimension creates an empty file.
  *
  */
 template<class T>
 class Matrix3D : public Matrix<T>
 {
     public:
         // constructors
         Matrix3D() = default ;
         /*!
          * \brief Constructs a matrix with the given dimensions,
          * filled with 0 values.
          * \param dim1 the first dimension.
          * \param dim2 the second dimension.
          * \param dim3 the third dimension.
          */
         Matrix3D(size_t dim1, size_t dim2, size_t dim3) ;
         /*!
          * \brief Constructs a matrix with the given dimensions and
          * initialize the values to the given value.
          * \param dim1 the first dimension.
          * \param dim2 the second dimension.
          * \param dim3 the third dimension.
          * \param value the value to initialize the matrix content
          * with.
          */
         Matrix3D(size_t dim1, size_t dim2, size_t dim3, T value) ;
         /*!
          * \brief Copy constructor
          * \param other the matrix to copy the content from.
          */
         Matrix3D(const Matrix3D& other) ;
         /*!
          * \brief Constructs a matrix from a text file. A matrix contructed
          * from an empty file (or a file containing only one EOL char) returns
          * an empty matrix (null dimensions).
          * \param file_address the address of the file containing the matrix.
          * \throw std::runtime_error if anything happen while reading the
          * file (format error, file not found, etc).
          */
         Matrix3D(const std::string& file_address) ;
 
         /*!
          * \brief Destructor.
          */
         virtual ~Matrix3D() = default ;
 
         // methods overloaded from Matrix
         using Matrix<T>::get ;
         using Matrix<T>::set ;
 
         // methods
         /*!
          * \brief Gets the element at the given coordinates.
          * \param dim1 the first dimension coordinate.
          * \param dim2 the second dimension coordinate.
          * \param dim3 the third dimension coordinate.
          * \throw std::out_of_range exception if the coordinates
          * are out of range.
          * \return the element.
          */
         T get(size_t dim1, size_t dim2, size_t dim3) const ;
         /*!
          * \brief Sets the element at the given coordinates
          * to the given value.
          * \param dim1 the first dimension coordinate.
          * \param dim2 the second dimension coordinate.
          * \param dim3 the third dimension coordinate.
          * \param value the new value.
          * \throw std::out_of_range exception if the coordinates
          * are out of range.
          */
         void set(size_t dim1, size_t dim2, size_t dim3, T value) ;
 
         /*!
          * \brief Produces a nice representation of the matrix on the given
          * stream.
          * \param stream the stream.
          * \param precision the rounding precision.
          * \param width the column width in number of characters.
          * \param sep the character separator.
          */
         virtual void print(std::ostream& stream, size_t precision=4 ,size_t width=8, char sep=' ') const override ;
 
         // operators
         /*!
          * \brief Returns a reference to the corrresponding
          * element. This method does not perform any check on
          * the coordinates.
          * \param dim1 the first dimension coordinate.
          * \param dim2 the second dimension coordinate.
          * \param dim3 the third dimension coordinate.
          * \return a reference to this element.
          */
-        T& operator() (size_t dim1, size_t dim2, size_t dim3) ;
+        T& operator () (size_t dim1, size_t dim2, size_t dim3) ;
         /*!
          * \brief Returns a constant reference to the corrresponding
          * element. This method does not perform any check on
          * the coordinates.
          * \param dim1 the first dimension coordinate.
          * \param dim2 the second dimension coordinate.
          * \param dim3 the third dimension coordinate.
          * \return a constant reference to this element.
          */
-        const T& operator() (size_t dim1, size_t dim2, size_t dim3) const ;
+        const T& operator () (size_t dim1, size_t dim2, size_t dim3) const ;
 
     private:
         // methods
         /*!
          * \brief Checks whether a given string is a slice header
          * (such as ",,0"), as found in files storing Matrix3D.
          * \param str the string to check.
          * \return whether the string is a slice header.
          */
         bool is_header(const std::string& str) const ;
 
+        /*!
+         * \brief Converts a triplet of VALID (dim1, dim2, dim3) coordinates
+         * to a the corresponding offset allowing to get an element in the
+         * data vector.
+         * \param dim1 the index of the 1st dimension slice (row).
+         * \param dim2 the index of the 2nd dimension slice (column).
+         * \param dim3 the index of the 3rd dimension slice.
+         * \return the corresponding offset.
+         */
+        size_t convert_to_offset(size_t dim1, size_t dim2, size_t dim3) const ;
+
+        /*!
+         * \brief Computes and stores the offsets at which
+         * each slice on the 1st dimension (row) starts.
+         */
+        void compute_dim1_offsets() ;
+
+        /*!
+         * \brief Computes and stores the offsets at which
+         * each slice on the 2nd dimension (column) starts.
+         */
+        void compute_dim2_offsets() ;
+
+        /*!
+         * \brief Computes and stores the offsets at which
+         * each slice on the 3rd dimension (3rd dimension
+         * slice) starts.
+         */
+        void compute_dim3_offsets() ;
+
+        /*!
+         * \brief Contains the offsets at which each x slice
+         * starts. Each element corresponds to the corresponding
+         * x slice (1st element -> 1st x slice (row)).
+         */
+        std::vector<size_t> _dim1_offsets ;
+        /*!
+         * \brief Contains the offsets at which each y slice
+         * starts. Each element corresponds to the corresponding
+         * y slice (1st element -> 1st y slice (column)).
+         */
+        std::vector<size_t> _dim2_offsets ;
+        /*!
+         * \brief Contains the offsets at which each x slice
+         * starts. Each element corresponds to the corresponding
+         * x slice (1st element -> 1st z slice).
+         */
+        std::vector<size_t> _dim3_offsets ;
 } ;
 
 // operators
 /*!
  * \brief Addition operator.
  * \param m the matrix of interest
  * \param value the value to add to each element.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix3D<T> operator + (Matrix3D<T> m, T value)
 {   Matrix3D<T> other(m) ;
     m += value ;
     return m ;
 }
 
 /*!
  * \brief Substraction operator
  * \param m the matrix of interest.
  * \param value the value to substract to each element.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix3D<T> operator - (Matrix3D<T> m, T value)
 {    Matrix3D<T> other(m) ;
      m -= value ;
      return m ;
 }
 
 /*!
  * \brief Multiplication operator.
  * \param m the matrix of interest.
  * \param value the value to multiply each elements by.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix3D<T> operator * (Matrix3D<T> m, T value)
 {    Matrix3D<T> other(m) ;
      m *= value ;
      return m ;
 }
 
 /*!
  * \brief Division operator.
  * \param m the matrix of interest.
  * \param value the value to divide each elements by.
  * \throw std::invalid_argument if value is 0.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix3D<T> operator / (Matrix3D<T> m, T value)
 {   if(value == static_cast<T>(0))
     {   throw std::invalid_argument("division by 0!") ; }
     Matrix3D<T> other(m) ;
     other /= value ;
     return other ;
 }
 
 /*!
  * \brief Sends a representation of the matrix to the stream.
  * \param stream the stream of interest.
  * \param m the matrix of interest.
  * \return a reference to the stream.
  */
 template<class T>
 std::ostream& operator << (std::ostream& stream, const Matrix3D<T>& m)
 {   m.print(stream) ;
     return stream ;
 }
 
 
 
 // method implementation
 template<class T>
 Matrix3D<T>::Matrix3D(size_t dim1, size_t dim2, size_t dim3)
     : Matrix3D<T>(dim1, dim2, dim3, 0)
 {}
 
 template<class T>
 Matrix3D<T>::Matrix3D(size_t dim1, size_t dim2, size_t dim3, T value)
-    : Matrix<T>({dim1, dim2, dim3}, value)
-{}
+    : Matrix<T>({dim1, dim2, dim3}, value),
+      _dim1_offsets(dim1),
+      _dim2_offsets(dim2),
+      _dim3_offsets(dim3)
+{   this->compute_dim1_offsets() ;
+    this->compute_dim2_offsets() ;
+    this->compute_dim3_offsets() ;
+}
 
 template<class T>
 Matrix3D<T>::Matrix3D(const Matrix3D &other)
     : Matrix<T>(other)
-{}
+{   this->_dim1_offsets = other._dim1_offsets ;
+    this->_dim2_offsets = other._dim2_offsets ;
+    this->_dim3_offsets = other._dim3_offsets ;
+}
 
 
 template<class T>
 Matrix3D<T>::Matrix3D(const std::string &file_address)
 {
     this->_dim       = {0,0,0} ;
     this->_data      = std::vector<T>() ;
     this->_dim_size  = this->_dim.size() ;
     this->_data_size = this->_data.size() ;
     this->_dim_prod  = std::vector<size_t>(this->_dim_size, 0) ;
 
     std::ifstream file(file_address, std::ifstream::in) ;
     if(file.fail())
     {   char msg[BUFFER_SIZE] ;
         sprintf(msg, "error! cannot open %s", file_address.c_str()) ;
         throw std::runtime_error(msg) ;
     }
 
     std::string         buffer_str ;
     std::vector<T>      buffer_vec ;
     T                   buffer_T ;
 
     // read file
     size_t n_line      = 0, n_line_data = 0 ; // number of line and of data line read
     size_t row_len     = 0, col_len     = 0 ; // length of row and column in nber of values
     size_t row_len_cur = 0, col_len_cur = 0 ; // current number of values read in row and col
 
     while(getline(file, buffer_str))
     {   if(file.fail())
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "error! while reading %s", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
         // check empty line
         if(buffer_str.size() == 0)
         {   // this file only contains one eol char and should be considered as empty,
             // -> returns empty matrix not an error
             if(n_line == 0 and file.peek() == EOF and file.eof())
             {  break ; }
 
             file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! while reading %s (empty line)", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
 
 
         // check whether it is the beginning of a slice
         // 1st line in file should be one like this
         if(this->is_header(buffer_str))
         {   // check that slice have a constant number of rows
             if(this->_dim[2] == 1)
             {   col_len = col_len_cur ;
                 // this->_dim[0] = row_len ;
                 // this->_dim[1]  = col_len ;
             }
             else if(col_len_cur != col_len)
             {   file.close() ;
                 char msg[BUFFER_SIZE] ;
                 sprintf(msg, "format error! slice have variable dimensions 1 in %s", file_address.c_str()) ;
                 throw std::runtime_error(msg) ;
             }
             this->_dim[2]++ ;
             col_len_cur = 0 ;
             n_line++ ;
             continue ;
         }
         // 1st line in file should be a header and entering
         // this block is forbidden
         if(n_line == 0)
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! first line is not a slice header in %s", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
 
         // parse line
         row_len_cur = 0 ;
         buffer_vec.clear() ;
         std::istringstream buffer_ss(buffer_str) ;
         while(buffer_ss >> buffer_T)
         {   buffer_vec.push_back(buffer_T) ;
             row_len_cur++ ;
         }
         // check for an error which likely indicates that a value could not be
         // casted into a type T (mixed data types in the file)
         if(buffer_ss.fail() and not buffer_ss.eof())
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! could not read a line in %s (incompatible data types)", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
 
         // check that number of column is constant
         if(n_line_data == 0)
         {  row_len = row_len_cur ; }
         else if(row_len_cur != row_len)
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! slice have variable dimensions 2 in %s", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
 
         // update matrix content
         for(auto i : buffer_vec)
         {   this->_data.push_back(i) ;
             this->_data_size++ ;
         }
         col_len_cur++ ;
         n_line_data++ ;
         n_line++ ;
         // update matrix dimensions
         this->_dim[0] = row_len_cur ;
         this->_dim[1] = col_len_cur ;
     }
     // check dimensions of last slice
     if(col_len_cur != this->_dim[1])
     {   file.close() ;
         char msg[BUFFER_SIZE] ;
         sprintf(msg, "format error! slice have variable dimensions in %s", file_address.c_str()) ;
         throw std::runtime_error(msg) ;
     }
 
     file.close() ;
     this->compute_dim_product() ;
+
+    this->_dim1_offsets = std::vector<size_t>(this->_dim[1]) ;
+    this->_dim2_offsets = std::vector<size_t>(this->_dim[0]) ;
+    this->_dim3_offsets = std::vector<size_t>(this->_dim[2]) ;
+    this->compute_dim1_offsets() ;
+    this->compute_dim2_offsets() ;
+    this->compute_dim3_offsets() ;
 }
 
 
 template<class T>
 T Matrix3D<T>::get(size_t dim1, size_t dim2, size_t dim3) const
 {   try
     {  return this->get({dim1, dim2, dim3}) ; }
     catch(std::out_of_range& e)
     {   throw e ; }
 }
 
 template<class T>
 void Matrix3D<T>::set(size_t dim1, size_t dim2, size_t dim3, T value)
 {   try
     {  return this->set({dim1, dim2, dim3}, value) ; }
     catch(std::out_of_range& e)
     {   throw e ; }
 }
 
 
 template<class T>
 T& Matrix3D<T>::operator () (size_t dim1, size_t dim2, size_t dim3)
-{   std::vector<size_t> coord = {dim2, dim1, dim3} ;
-    return this->_data[this->convert_to_offset(coord)] ;
-}
+{   return this->_data[this->convert_to_offset(dim1, dim2, dim3)] ; }
+
+template<class T>
+const T& Matrix3D<T>::operator () (size_t dim1, size_t dim2, size_t dim3) const
+{   return this->_data[this->convert_to_offset(dim1, dim2, dim3)] ; }
 
 
 template<class T>
 void Matrix3D<T>::print(std::ostream& stream, size_t precision, size_t width, char sep) const
 {   // if the matrix has at least one 0 dimension (no data), don't do anything
     if(this->_dim[0]==0 or this->_dim[1]==0 or this->_dim[2]==0)
     {   return ; }
 
     stream.setf(std::ios::left) ;
     stream << std::setprecision(precision) << std::fixed ;
     std::vector<size_t> dim = this->get_dim() ;
 
     size_t    n  = 0 ;
     size_t n_tot = std::accumulate(dim.begin(), dim.end(), 1, std::multiplies<int>()) ;
 
     for(size_t z=0; z<dim[2]; z++)
     {   stream << ",," << z << std::endl ;
         for(size_t x=0; x<dim[0]; x++)
         {   for(size_t y=0; y<dim[1]; y++, n++)
             {   stream << std::setw(width) << (*this)(x,y,z) << sep ; }
             if(n<n_tot)
             {   stream << std::endl ; }
         }
     }
 }
 
-
-template<class T>
-const T& Matrix3D<T>::operator () (size_t dim1, size_t dim2, size_t dim3) const
-{   std::vector<size_t> coord = {dim2, dim1, dim3} ;
-    return this->_data[this->convert_to_offset(coord)] ;
-}
-
-
 template<class T>
 bool Matrix3D<T>::is_header(const std::string& str) const
 {   if(str[0] == ',' and
             str[1] == ',' and
             str.find(',', 2) == std::string::npos)
    {   return true ; }
    return false ;
 }
 
+template<class T>
+void Matrix3D<T>::compute_dim1_offsets()
+{   for(size_t i=0; i<this->_dim[1]; i++)
+    {   this->_dim1_offsets[i] = i * this->_dim_prod[1] ; }
+}
+
+template<class T>
+void Matrix3D<T>::compute_dim2_offsets()
+{   for(size_t i=0; i<this->_dim[0]; i++)
+    {   this->_dim2_offsets[i] = i * this->_dim_prod[0] ; }
+}
+
+template<class T>
+void Matrix3D<T>::compute_dim3_offsets()
+{   for(size_t i=0; i<this->_dim[2]; i++)
+    {   this->_dim3_offsets[i] = i * this->_dim_prod[2] ; }
+}
+
+template<class T>
+size_t Matrix3D<T>::convert_to_offset(size_t dim1, size_t dim2, size_t dim3) const
+{   /*
+    size_t offset = 0 ;
+
+    for(size_t i=0; i<this->_dim_size; i++)
+    {   offset += coord[i] * this->_dim_prod[i] ; }
+
+    return offset ;
+    */
+    size_t offset = this->_dim1_offsets[dim1] +
+                    this->_dim2_offsets[dim2] +
+                    this->_dim3_offsets[dim3] ;
+    return offset ;
+}
+
 #endif // MATRIX3D_HPP
diff --git a/src/Matrix/Matrix4D.hpp b/src/Matrix/Matrix4D.hpp
index d0a280a..28266ce 100644
--- a/src/Matrix/Matrix4D.hpp
+++ b/src/Matrix/Matrix4D.hpp
@@ -1,594 +1,719 @@
 #ifndef MATRIX4D_HPP
 #define MATRIX4D_HPP
 
 #include <Matrix.hpp>
 
 #include <string>
 #include <vector>
 #include <stdexcept> // runtime_error, out_of_range
 #include <iostream>
 #include <iomanip>  // setw(), setprecision(), fixed
 #include <fstream>  // ifstream
 #include <sstream>  // sstream
 
 
 #define BUFFER_SIZE 4096
 
 
 /*!
  * The Matrix4D class is a specialisation of the Matrix
  * class to make work with 4D matrices more easily.
  *
  * A text file format is defined to store such matrices. The specifications are as
  * follows :
  * Absolutely NO empty lines are allowed!
  * The following lines should contain :
  *
  * 1st line       : a slice header ',,,0' indicating that a slice of the 4th dimension
  *                  is beginning.
  * 3nd - Nth line : the slice of the 4th dimension. It contains slice in the 3rd dimension
  *                  which are 2D matrices separated by headers (',,0' and ',,1', in the
  *                  example below, they have 2x3 dimensions).
  * N+1th line     : ',,,1' indicating that the 2nd slice of the 4th dimension is beginning.
  * and so on...
  * Example
  * ---- start ----
  * ,,,0
  * ,,0
  * 1      2      3
  * 4      5      6
  * ,,1
  * 7      8      9
  * 10     11     12
  * ,,,1
  * ,,0
  * 21     22     23
  * 24     25     26
  * ,,1
  * 27     28     29
  * 30     31     32
  * ----- end -----
  *
  * Constructing a matrix from an empty file (0 bytes or only an EOL char) returns a null
  * matrix (0x0x0x0 dimensions). Writting a null matrix (that is with at least one null
  * dimension creates an empty file.
  *
  */
 template<class T>
 class Matrix4D : public Matrix<T>
 {
     public:
         // constructors
         Matrix4D() = default ;
         /*!
          * \brief Constructs a matrix with the given dimensions,
          * filled with 0 values.
          * \param dim1 the first dimension.
          * \param dim2 the second dimension.
          * \param dim3 the third dimension.
          * \param dim4 the fourth dimension.
          */
         Matrix4D(size_t dim1, size_t dim2, size_t dim3, size_t dim4) ;
         /*!
          * \brief Constructs a matrix with the given dimensions and
          * initialize the values to the given value.
          * \param dim1 the first dimension.
          * \param dim2 the second dimension.
          * \param dim3 the third dimension.
          * \param dim4 the fourth dimension.
          * \param value the value to initialize the matrix content
          * with.
          */
         Matrix4D(size_t dim1, size_t dim2, size_t dim3, size_t dim4, T value) ;
         /*!
          * \brief Copy constructor
          * \param other the matrix to copy the content from.
          */
         Matrix4D(const Matrix4D& other) ;
         /*!
          * \brief Constructs a matrix from a text file. A matrix contructed
          * from an empty file (or a file containing only one EOL char) returns
          * an empty matrix (null dimensions).
          * \param file_address the address of the file containing the matrix.
          * \throw std::runtime_error if anything happen while reading the
          * file (format error, file not found, etc).
          */
         Matrix4D(const std::string& file_address) ;
 
         /*!
          * \brief Destructor.
          */
         virtual ~Matrix4D() = default ;
 
         // methods overloaded from Matrix
         using Matrix<T>::get ;
         using Matrix<T>::set ;
 
         // methods OK
         /*!
          * \brief Gets the element at the given coordinates.
          * \param dim1 the first dimension coordinate.
          * \param dim2 the second dimension coordinate.
          * \param dim3 the third dimension coordinate.
          * \param dim4 the fourth dimension coordinate.
          * \throw std::out_of_range exception if the coordinates
          * are out of range.
          * \return the element.
          */
         T get(size_t dim1, size_t dim2, size_t dim3, size_t dim4) const ;
         /*!
          * \brief Sets the element at the given coordinates
          * to the given value.
          * \param dim1 the first dimension coordinate.
          * \param dim2 the second dimension coordinate.
          * \param dim3 the third dimension coordinate.
          * \param dim4 the fourth dimension coordinate.
          * \param value the new value.
          * \throw std::out_of_range exception if the coordinates
          * are out of range.
          */
         void set(size_t dim1, size_t dim2, size_t dim3, size_t dim4, T value) ;
         /*!
          * \brief Produces a nice representation of the matrix on the given
          * stream.
          * \param stream the stream.
          * \param precision the rounding precision.
          * \param width the column width in number of characters.
          * \param sep the character separator.
          */
         virtual void print(std::ostream& stream, size_t precision=4 ,size_t width=8, char sep=' ') const override ;
 
-        // operators OK
+        // operators
         /*!
          * \brief Returns a reference to the corrresponding
          * element. This method does not perform any check on
          * the coordinates.
          * \param dim1 the first dimension coordinate.
          * \param dim2 the second dimension coordinate.
          * \param dim3 the third dimension coordinate.
          * \param dim4 the third dimension coordinate.
          * \return a reference to this element.
          */
         T& operator() (size_t dim1, size_t dim2, size_t dim3, size_t dim4) ;
         /*!
          * \brief Returns a reference to the corrresponding
          * element. This method does not perform any check on
          * the coordinates.
          * \param dim1 the first dimension coordinate.
          * \param dim2 the second dimension coordinate.
          * \param dim3 the third dimension coordinate.
          * \param dim4 the third dimension coordinate.
          * \return a reference to this element.
          */
         const T& operator() (size_t dim1, size_t dim2, size_t dim3, size_t dim4) const ;
 
     private:
         // methods
         /*!
          * \brief Checks whether a given string is a 3D header
          * (such as ",,0"), as found in files storing Matrix4D.
          * \param str the string to check.
          * \return whether the string is such a slice header.
          */
         bool is_header_3d(const std::string& str) const ;
         /*!
          * \brief Checks whether a given string is a 4D header
          * (such as ",,,0"), as found in files storing Matrix4D.
          * \param str the string to check.
          * \return whether the string is such a slice header.
          */
         bool is_header_4d(const std::string& str) const ;
 
         /*!
          * \brief Routine to load 4D matrices from files.
          * This method reads from a std::ifstream object,
          * from the current pointer location until i) a 4D
          * header line is found (such as ',,,1') or ii) until
          * it cannot read anymore from the stream. All
          * data are pushed back into the data vector and
          * the dimensions of the data read are stored into
          * the dim vector (these data are actually a 3D
          * matrix). If the method returned because it
          * found another 4D header, it returns true, false
          * otherwise.
          * To read an entire 4D matrix from a file, simply
          * use this scheme : i) read the 1st 4D header
          * ii) call this function while it returns true.
          * \param file_name a reference to a string containing
          * the address of the file currently read (for exception
          * messages).
          * \param file a reference to the std::ifstream to read
          * from. Obviously, the stream state will be modified as
          * the method reads from it. However, it will never be
          * closed by the method.
          * \param data a reference to an empty vector where the
          * read data will be pushed back.
          * \param dim a reference to an empty vector where the
          * dimensions of the read data will be stored.
          * \return whether the last piece of data read from the
          * stream was a 4D header.
          */
         bool get_3d_slice(const std::string& file_name, std::ifstream& file,
                           std::vector<T>& data, std::vector<size_t>& dim) const ;
 
+        /*!
+         * \brief Converts a quadruplet of VALID (dim1, dim2, dim3, dim4)
+         * coordinates to a the corresponding offset allowing to get an
+         * element in the data vector.
+         * \param dim1 the index of the 1st dimension slice.
+         * \param dim2 the index of the 2nd dimension slice.
+         * \param dim3 the index of the 3rd dimension slice.
+         * \param dim4 the index of the 4th dimension slice.
+         * \return the corresponding offset.
+         */
+        size_t convert_to_offset(size_t dim1,
+                                 size_t dim2,
+                                 size_t dim3,
+                                 size_t dim4) const ;
+
+        /*!
+         * \brief Computes and stores the offsets at which
+         * each slice on the 1st dimension starts.
+         */
+        void compute_dim1_offsets() ;
+
+        /*!
+         * \brief Computes and stores the offsets at which
+         * each slice on the 2nd dimension starts.
+         */
+        void compute_dim2_offsets() ;
+
+        /*!
+         * \brief Computes and stores the offsets at which
+         * each slice on the 3rd dimension starts.
+         */
+        void compute_dim3_offsets() ;
+
+        /*!
+         * \brief Computes and stores the offsets at which
+         * each slice on the 4th dimension starts.
+         */
+        void compute_dim4_offsets() ;
+
+        /*!
+         * \brief Contains the offsets at which each dim1 slice
+         * starts. Each element corresponds to the corresponding
+         * dim1 slice (1st element -> 1st dim1 slice).
+         */
+        std::vector<size_t> _dim1_offsets ;
+        /*!
+         * \brief Contains the offsets at which each dim2 slice
+         * starts. Each element corresponds to the corresponding
+         * y slice (1st element -> 1st dim2 slice).
+         */
+        std::vector<size_t> _dim2_offsets ;
+        /*!
+         * \brief Contains the offsets at which each dim3 slice
+         * starts. Each element corresponds to the corresponding
+         * x slice (1st element -> 1st dim3 slice).
+         */
+        std::vector<size_t> _dim3_offsets ;
+        /*!
+         * \brief Contains the offsets at which each dim4 slice
+         * starts. Each element corresponds to the corresponding
+         * x slice (1st element -> 1st dim4 slice).
+         */
+        std::vector<size_t> _dim4_offsets ;
+
 } ;
 
 // operators
 /*!
  * \brief Addition operator.
  * \param m the matrix of interest
  * \param value the value to add to each element.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix4D<T> operator + (Matrix4D<T> m, T value)
 {   Matrix4D<T> other(m) ;
     m += value ;
     return m ;
 }
 
 /*!
  * \brief Substraction operator
  * \param m the matrix of interest.
  * \param value the value to substract to each element.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix4D<T> operator - (Matrix4D<T> m, T value)
 {    Matrix4D<T> other(m) ;
      m -= value ;
      return m ;
 }
 
 /*!
  * \brief Multiplication operator.
  * \param m the matrix of interest.
  * \param value the value to multiply each elements by.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix4D<T> operator * (Matrix4D<T> m, T value)
 {    Matrix4D<T> other(m) ;
      m *= value ;
      return m ;
 }
 
 /*!
  * \brief Division operator.
  * \param m the matrix of interest.
  * \param value the value to divide each elements by.
  * \throw std::invalid_argument if value is 0.
  * \return the resulting matrix.
  */
 template<class T>
 const Matrix4D<T> operator / (Matrix4D<T> m, T value)
 {   if(value == static_cast<T>(0))
     {   throw std::invalid_argument("division by 0!") ; }
     Matrix4D<T> other(m) ;
     other /= value ;
     return other ;
 }
 
 /*!
  * \brief Sends a representation of the matrix to the stream.
  * \param stream the stream of interest.
  * \param m the matrix of interest.
  * \return a reference to the stream.
  */
 template<class T>
 std::ostream& operator << (std::ostream& stream, const Matrix4D<T>& m)
 {   m.print(stream) ;
     return stream ;
 }
 
 
 
 // method implementation
 template<class T>
 Matrix4D<T>::Matrix4D(size_t dim1, size_t dim2, size_t dim3, size_t dim4)
-    : Matrix<T>({dim1, dim2, dim3, dim4}, 0)
+    : Matrix4D<T>(dim1, dim2, dim3, dim4, 0)
 {}
 
 template<class T>
 Matrix4D<T>::Matrix4D(size_t dim1, size_t dim2, size_t dim3, size_t dim4, T value)
-    : Matrix<T>({dim1, dim2, dim3, dim4}, value)
-{}
+    : Matrix<T>({dim1, dim2, dim3, dim4}, value),
+      _dim1_offsets(dim1),
+      _dim2_offsets(dim2),
+      _dim3_offsets(dim3),
+      _dim4_offsets(dim4)
+{   this->compute_dim1_offsets() ;
+    this->compute_dim2_offsets() ;
+    this->compute_dim3_offsets() ;
+    this->compute_dim4_offsets() ;
+}
 
 template<class T>
 Matrix4D<T>::Matrix4D(const Matrix4D &other)
     : Matrix<T>(other)
-{}
+{   this->_dim1_offsets = other._dim1_offsets ;
+    this->_dim2_offsets = other._dim2_offsets ;
+    this->_dim3_offsets = other._dim3_offsets ;
+    this->_dim4_offsets = other._dim4_offsets ;
+}
 
 template<class T>
 Matrix4D<T>::Matrix4D(const std::string &file_address)
 {   this->_dim       = {0,0,0,0} ;
     this->_data      = std::vector<T>() ;
     this->_dim_size  = this->_dim.size() ;
     this->_data_size = this->_data.size() ;
     this->_dim_prod  = std::vector<size_t>(this->_dim_size, 0) ;
 
     std::ifstream file(file_address, std::ifstream::in) ;
     if(file.fail())
     {   char msg[BUFFER_SIZE] ;
         sprintf(msg, "error! cannot open %s", file_address.c_str()) ;
         throw std::runtime_error(msg) ;
     }
 
     std::string         buffer_str ;
     std::vector<T>      buffer_t ;
     std::vector<size_t> dim ;
 
     // read 1st line
     getline(file, buffer_str) ;
 
     // empty line
     if(buffer_str.size() == 0)
     {   // this file only contains one eol char and should be considered as empty,
         // -> returns empty matrix not an error
         if(file.peek() == EOF and file.eof())
         {  file.close() ;
             return ;
         }
         file.close() ;
         char msg[BUFFER_SIZE] ;
         sprintf(msg, "error! while reading %s (empty line)", file_address.c_str()) ;
         throw std::runtime_error(msg) ;
     }
     if(file.fail())
     {   file.close() ;
         char msg[BUFFER_SIZE] ;
         sprintf(msg, "error! while reading %s", file_address.c_str()) ;
         throw std::runtime_error(msg) ;
     }
 
     bool found_4d_header = this->is_header_4d(buffer_str) ;
     do
     {   if(file.fail())
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "error! while reading %s", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
         // check empty line
         if(buffer_str.size() == 0)
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "error! while reading %s (empty line)", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
 
         // this is the beginning of a 3D slice -> get it using routine
         if(found_4d_header)
         {   try
             {   // get slice
                 buffer_t.clear() ;
                 dim.clear() ;
                 found_4d_header = this->get_3d_slice(file_address, file, buffer_t, dim);
                 // update data
                 for(const auto& i : buffer_t)
                 {   this->_data.push_back(i) ;
                     this->_data_size++ ;
                 }
                 // update dim only for the 1st slice (the 1st slice set the dimensions)
                 if(this->_dim[3] == 0)
                 {   this->_dim[0] = dim[0] ;
                     this->_dim[1] = dim[1] ;
                     this->_dim[2] = dim[2] ;
                 }
                 // check dimensions of the slice
                 else
                 {   if(dim[0] != this->_dim[0] or
                        dim[1] != this->_dim[1] or
                        dim[2] != this->_dim[2])
                     {   char msg[BUFFER_SIZE] ;
                         sprintf(msg, "format error! slice have variable dimensions in %s", file_address.c_str()) ;
                         throw std::runtime_error(msg) ;
                     }
                 }
                 this->_dim[3]++ ;
             }
             catch(std::runtime_error& e)
             {   file.close() ;
                 throw e ;
             }
         }
         // this is an error, everything between two ',,,N' header
         // should be read at once. The only way out of the loop
         // is that no more header has been read because of eof
         else if(not found_4d_header and not file.eof())
         {   file.close() ;
             char msg[BUFFER_SIZE] ;
             sprintf(msg, "error! while reading %s", file_address.c_str()) ;
             throw std::runtime_error(msg) ;
         }
     } while(found_4d_header) ;
 
     file.close() ;
     this->compute_dim_product() ;
+
+    this->_dim1_offsets = std::vector<size_t>(this->_dim[1]) ;
+    this->_dim2_offsets = std::vector<size_t>(this->_dim[0]) ;
+    this->_dim3_offsets = std::vector<size_t>(this->_dim[2]) ;
+    this->_dim4_offsets = std::vector<size_t>(this->_dim[3]) ;
+    this->compute_dim1_offsets() ;
+    this->compute_dim2_offsets() ;
+    this->compute_dim3_offsets() ;
+    this->compute_dim4_offsets() ;
 }
 
 template<class T>
 T Matrix4D<T>::get(size_t dim1, size_t dim2, size_t dim3, size_t dim4) const
 {   try
     {   return this->get({dim1, dim2, dim3, dim4}) ; }
     catch(std::out_of_range& e)
     {   throw e ; }
 }
 
 template<class T>
 void Matrix4D<T>::set(size_t dim1, size_t dim2, size_t dim3, size_t dim4, T value)
 {   try
     {   this->set({dim1, dim2, dim3, dim4}, value) ; }
     catch(std::out_of_range& e)
     {   throw e ; }
 }
 
 template<class T>
 void Matrix4D<T>::print(std::ostream &stream, size_t precision, size_t width, char sep) const
 {   // if the matrix has at least one 0 dimension (no data), don't do anything
     if(this->_dim[0]==0 or this->_dim[1]==0 or this->_dim[2]==0 or this->_dim[3]==0)
     {   return ; }
 
     stream.setf(std::ios::left) ;
     stream << std::setprecision(precision) << std::fixed ;
     std::vector<size_t> dim = this->get_dim() ;
 
     size_t    n  = 0 ;
     size_t n_tot = std::accumulate(dim.begin(), dim.end(), 1, std::multiplies<int>()) ;
 
     for(size_t dim4=0; dim4<dim[3]; dim4++)
     {   stream << ",,," << dim4 << std::endl ;
         for(size_t dim3=0; dim3<dim[2]; dim3++)
         {   stream << ",," << dim3 << std::endl ;
             for(size_t dim2=0; dim2<dim[0]; dim2++)
             {   for(size_t dim1=0; dim1<dim[1]; dim1++, n++)
                 {   stream << std::setw(width) << (*this)(dim2,dim1,dim3,dim4) << sep ; }
                 // avoids terminal eol
                 if(n < n_tot)
                 {   stream << std::endl ; }
             }
         }
     }
 }
 
 
 template<class T>
 T& Matrix4D<T>::operator () (size_t dim1, size_t dim2, size_t dim3, size_t dim4)
-{   std::vector<size_t> coord = {dim2, dim1, dim3, dim4} ;
-    return this->_data[this->convert_to_offset(coord)] ;
-}
+{   return this->_data[this->convert_to_offset(dim1, dim2, dim3, dim4)] ; }
 
 template<class T>
 const T& Matrix4D<T>::operator () (size_t dim1, size_t dim2, size_t dim3, size_t dim4) const
-{   std::vector<size_t> coord = {dim2, dim1, dim3, dim4} ;
-    return this->_data[this->convert_to_offset(coord)] ;
-}
+{   return this->_data[this->convert_to_offset(dim1, dim2, dim3, dim4)] ; }
 
 template<class T>
 bool Matrix4D<T>::is_header_3d(const std::string &str) const
 {   if(str[0] == ',' and
        str[1] == ',' and
        str.find(',', 2) == std::string::npos)
     {   return true ; }
     return false ;
 }
 
 template<class T>
 bool Matrix4D<T>::is_header_4d(const std::string &str) const
 {   if(str[0] == ',' and
        str[1] == ',' and
        str[2] == ',' and
        str.find(',', 3) == std::string::npos)
     {   return true ; }
     return false ;
 }
 
 template<class T>
 bool Matrix4D<T>::get_3d_slice(const std::string& file_name, std::ifstream& file,
                                std::vector<T> &data, std::vector<size_t> &dim) const
 {
     bool found_4d_header = false ; // the flag to return
 
     dim = {0,0,0} ;
 
     std::string         buffer_str ;
     std::vector<T>      buffer_vec ;
     T                   buffer_T ;
 
     size_t n_line      = 0, n_line_data = 0 ; // number of line and of data line read
     size_t row_len     = 0, col_len = 0 ;     // length of row and column in nber of values
     size_t row_len_cur = 0, col_len_cur = 0 ; // current number of values read in row and col
 
     while(getline(file, buffer_str))
     {   if(file.fail())
         {   char msg[BUFFER_SIZE] ;
             sprintf(msg, "error! while reading %s", file_name.c_str()) ;
             throw std::runtime_error(msg) ;
         }
         // check empty line
         if(buffer_str.size() == 0)
         {   char msg[BUFFER_SIZE] ;
             sprintf(msg, "error! while reading %s (empty line)", file_name.c_str()) ;
             throw std::runtime_error(msg) ;
         }
         // check whether this is the beginning of a 4D slice header, if so
         // break
         if(this->is_header_4d(buffer_str))
         {   found_4d_header = true ;
             break ;
         }
         // check whether it is the beginning of a slice
         // 1st line in file should be
         if(this->is_header_3d(buffer_str))
         {   // check that slice have a constant number of rows
             if(dim[2] == 1)
             {   col_len = col_len_cur ;
                 // dim[0] = row_len ;
                 // dim[1]  = col_len ;
             }
             else if(col_len_cur != col_len)
             {   char msg[BUFFER_SIZE] ;
                 sprintf(msg, "format error! slice have variable dimensions in %s", file_name.c_str()) ;
                 throw std::runtime_error(msg) ;
             }
             dim[2]++ ;
             col_len_cur = 0 ;
             n_line++ ;
             continue ;
         }
         // 1st line in file should be a header and entering
         // this block is forbidden
         if(n_line == 0)
         {   char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! first line is not a slice header in %s", file_name.c_str()) ;
             throw std::runtime_error(msg) ;
         }
 
         // parse line
         row_len_cur = 0 ;
         buffer_vec.clear() ;
         std::istringstream buffer_ss(buffer_str) ;
         while(buffer_ss >> buffer_T)
         {   buffer_vec.push_back(buffer_T) ;
             row_len_cur++ ;
         }
         // check for an error which likely indicates that a value could not be
         // casted into a type T (mixed data types in the file)
         if(buffer_ss.fail() and not buffer_ss.eof())
         {   char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! could not read a line in %s (incompatible data types)", file_name.c_str()) ;
             throw std::runtime_error(msg) ;
         }
 
         // check that number of column is constant
         if(n_line_data == 0)
         {  row_len = row_len_cur ; }
         else if(row_len_cur != row_len)
         {   char msg[BUFFER_SIZE] ;
             sprintf(msg, "format error! slice have variable dimensions in %s", file_name.c_str()) ;
             throw std::runtime_error(msg) ;
         }
 
         // update matrix content
         for(auto i : buffer_vec)
         {   data.push_back(i) ; }
         col_len_cur++ ;
         n_line_data++ ;
         n_line++ ;
         // update dimension
         dim[0] = row_len_cur ;
         dim[1] = col_len_cur ;
     }
 
     // check dimensions of last slice
     if(col_len_cur != dim[1])
     {   char msg[BUFFER_SIZE] ;
         sprintf(msg, "format error! slice have variable dimensions 333 in %s", file_name.c_str()) ;
         throw std::runtime_error(msg) ;
     }
 
     return found_4d_header ;
 }
 
+template<class T>
+void Matrix4D<T>::compute_dim1_offsets()
+{   for(size_t i=0; i<this->_dim[1]; i++)
+    {   this->_dim1_offsets[i] = i * this->_dim_prod[1] ; }
+}
+
+template<class T>
+void Matrix4D<T>::compute_dim2_offsets()
+{   for(size_t i=0; i<this->_dim[0]; i++)
+    {   this->_dim2_offsets[i] = i * this->_dim_prod[0] ; }
+}
+
+template<class T>
+void Matrix4D<T>::compute_dim3_offsets()
+{   for(size_t i=0; i<this->_dim[2]; i++)
+    {   this->_dim3_offsets[i] = i * this->_dim_prod[2] ; }
+}
+
+template<class T>
+void Matrix4D<T>::compute_dim4_offsets()
+{   for(size_t i=0; i<this->_dim[3]; i++)
+    {   this->_dim4_offsets[i] = i * this->_dim_prod[3] ; }
+}
+
+template<class T>
+size_t Matrix4D<T>::convert_to_offset(size_t dim1,
+                                      size_t dim2,
+                                      size_t dim3,
+                                      size_t dim4) const
+{   /*
+    size_t offset = 0 ;
+
+    for(size_t i=0; i<this->_dim_size; i++)
+    {   offset += coord[i] * this->_dim_prod[i] ; }
+
+    return offset ;
+    */
+    size_t offset = this->_dim1_offsets[dim1] +
+                    this->_dim2_offsets[dim2] +
+                    this->_dim3_offsets[dim3] +
+                    this->_dim4_offsets[dim4] ;
+    return offset ;
+}
+
 
 #endif // MATRIX4D_HPP
diff --git a/src/Random/Random.cpp b/src/Random/Random.cpp
index 201bb70..fdc7d02 100755
--- a/src/Random/Random.cpp
+++ b/src/Random/Random.cpp
@@ -1,30 +1,46 @@
 #include "Random.hpp"
+#include <string>
+#include <iostream>
 
 bool rand_bernoulli(double p)
 {   std::bernoulli_distribution dist(p) ;
     return dist(getRandomGenerator()) ;
 }
 
 
 std::vector<bool> rand_bernoulli(double p, size_t n)
 {   std::vector<bool> vector(n) ;
     std::bernoulli_distribution dist(p) ;
     for(size_t i=0; i<n; i++)
     {   vector[i] = dist(getRandomGenerator()) ; }
     return vector ;
 }
 
 
 double rand_normal(double m, double sd)
 {   std::normal_distribution<double> dist(m, sd) ;
     return dist(getRandomGenerator()) ;
 }
 
 
 std::vector<double> rand_normal(double m, double sd, double n)
 {   std::vector<double> vector(n) ;
     std::normal_distribution<double> dist(m, sd) ;
     for(size_t i=0; i<n; i++)
     {   vector[i] = dist(getRandomGenerator()) ; }
     return vector ;
 }
+
+std::string rand_string(size_t length)
+{    // the alphabet to sample
+     static const std::string alphanum = "0123456789"
+                                         "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                         "abcdefghijklmnopqrstuvwxyz" ;
+     static const size_t lower = 0 ;
+     static const size_t upper = alphanum.size() - 1 ;
+     // sampling
+     std::string s ;
+     for(size_t i=0; i<length; i++)
+     {   s.push_back(alphanum[rand_int_uniform(lower, upper)]) ; }
+     return s ;
+}
diff --git a/src/Random/Random.hpp b/src/Random/Random.hpp
index b702e96..eea214f 100755
--- a/src/Random/Random.hpp
+++ b/src/Random/Random.hpp
@@ -1,123 +1,132 @@
 #ifndef RANDOM_HPP
 #define RANDOM_HPP
 
 #include <random>
 #include <vector>
 #include <assert.h>
 
 #include "RandomNumberGenerator.hpp"
 
 
 /*!
  * \brief Generates a random number from a
  * Bernouilli distribution of parameter p.
  * \param p the probability of success.
  * \return a random number.
  */
 bool rand_bernoulli(double p) ;
 
 
 /*!
  * \brief Generates n random number from a
  * Bernouilli distribution of parameter p.
  * Not faster than rand_bernoulli(double p)
  * \param p the probability of success.
  * \param n the number of values to sample.
  * \return a vector of n random numbers.
  */
 std::vector<bool> rand_bernoulli(double p, size_t n) ;
 
 
 /*!
  * \brief Generates a random number from a
  * Normal distribution of mean m and standard
  * deviation sd.
  * \param m the mean.
  * \param sd the standard deviation.
  * \return a random number.
  */
 double rand_normal(double m, double sd) ;
 
 
 /*!
  * \brief Generates n random numbers from a
  * Normal distribution of mean m and standard
  * deviation sd.
  * More efficient for sampling than
  * rand_normal(double m, double sd).
  * \param m the mean.
  * \param sd the standard deviation.
  * \param n the number of values to sample.
  * \return a vector of n random numbers.
  */
 std::vector<double> rand_normal(double m, double sd, size_t n) ;
 
+/*!
+ * Generates a random string made of [0-9a-zA-Z] 
+ * characters.
+ * \param length the length of the string to 
+ * generate.
+ * \return the generated string.
+ */
+std::string rand_string(size_t length) ;
+
 
 /*! Generates a real random number from a uniform
  * distribution comprised between min and max.
  * \param min the lower limit of the distribution.
  * \param max the upper limit of the distribution.
  * \return a random number.
  */
 template<typename T>
 T rand_real_uniform(T min, T max)
 {   std::uniform_real_distribution<T> dist(min, max) ;
     return dist(getRandomGenerator()) ;
 }
 
 
 /*! Generates n real random numbers from a uniform
  * distribution comprised between min and max.
  * \param min the lower limit of the distribution.
  * \param max the upper limit of the distribution.
  * \param n the number of value to sample.
  * \return a vector of n random number.
  */
 template<typename T>
 std::vector<T> rand_real_uniform(T min, T max, size_t n)
 {
     assert(n > 0) ;
 
     std::vector<T> vector(n) ;
     std::uniform_real_distribution<T> dist(min, max) ;
 
     for(size_t i=0; i<n; i++)
     {   vector[i] = dist(getRandomGenerator()) ; }
     return vector ;
 }
 
 /*! Generates a random integer from a uniform
  * distribution comprised between min and max.
  * \param min the lower limit of the distribution.
  * \param max the upper limit of the distribution.
  * \return a random number.
  */
 template<typename T>
 T rand_int_uniform(T min, T max)
 {   std::uniform_int_distribution<T> dist(min, max) ;
     return dist(getRandomGenerator()) ;
 }
 
 
 /*! Generates n random integers from a uniform
  * distribution comprised between min and max.
  * \param min the lower limit of the distribution.
  * \param max the upper limit of the distribution.
  * \param n the number of value to sample.
  * \return a vector of n random number.
  */
 template<typename T>
 std::vector<T> rand_int_uniform(T min, T max, size_t n)
 {
     assert(n > 0) ;
 
     std::vector<T> vector(n) ;
     std::uniform_int_distribution<T> dist(min, max) ;
 
     for(size_t i=0; i<n; i++)
     {   vector[i] = dist(getRandomGenerator()) ; }
     return vector ;
 }
 
 
 #endif //RANDOM_HPP
diff --git a/src/Unittests/unittests_genomictools.cpp b/src/Unittests/unittests_genomictools.cpp
index cf5365a..5176645 100644
--- a/src/Unittests/unittests_genomictools.cpp
+++ b/src/Unittests/unittests_genomictools.cpp
@@ -1,512 +1,508 @@
 #include <UnitTest++/UnitTest++.h>
 #include <seqan/bam_io.h>
 #include <GenomeRegion.hpp>
 #include <CorrelationMatrixCreator.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
 
 #include <iostream>
 #include <string>
 #include <vector>
 #include <list>
 #include <stdexcept>        // std::invalid_argument
 
 std::string file_bed = "/local/groux/scATAC-seq/data/toy_data/peaks.bed" ;
 std::string file_bam = "/local/groux/scATAC-seq/data/toy_data/sc_reads.bam" ;
 std::string file_bai = "/local/groux/scATAC-seq/data/toy_data/sc_reads.bam.bai" ;
 
 // GenomeRegion test suite
 SUITE(GenomeRegion)
 {
     // displays message
     TEST(message)
     {   std::cout << "Starting GenomicTools tests..." << std::endl ; }
 
     // tests vonstructor with value
     TEST(constructor_value)
     {   std::string chr = "chr1" ;
         int idx = 0 ;
 
         GenomeRegion r1(chr, idx, 0, 10) ;
         CHECK_EQUAL(chr, r1.chromosome) ;
         CHECK_EQUAL(0, r1.start) ;
         CHECK_EQUAL(10, r1.end) ;
         CHECK_EQUAL(10, r1.length) ;
 
         GenomeRegion r2(chr, idx, 1, 10) ;
         CHECK_EQUAL(chr, r2.chromosome) ;
         CHECK_EQUAL(1, r2.start) ;
         CHECK_EQUAL(10, r2.end) ;
         CHECK_EQUAL(9, r2.length) ;
 
         CHECK_THROW(GenomeRegion(chr, idx, -1,  10), std::invalid_argument) ;
         CHECK_THROW(GenomeRegion(chr, idx,  0, -10), std::invalid_argument) ;
     }
 
 
     // tests constructFragment factory function to create regions from bam
     /*
     TEST(test_contructFragment)
     {
         // expected content of bam file
         std::vector<GenomeRegion> regions ;
         regions.push_back(GenomeRegion("chr1",  400,   480)) ;
         regions.push_back(GenomeRegion("chr1",  470,   550)) ;
         regions.push_back(GenomeRegion("chr1",  560,   800)) ;
         regions.push_back(GenomeRegion("chr1",  560,   640)) ;
         regions.push_back(GenomeRegion("chr1",  610,   690)) ;
         regions.push_back(GenomeRegion("chr1",  670,   750)) ;
         regions.push_back(GenomeRegion("chr1",  730,   810)) ;
         regions.push_back(GenomeRegion("chr1",  770,   850)) ;
         regions.push_back(GenomeRegion("chr1",  950,   1150)) ;
         regions.push_back(GenomeRegion("chr1",  960,   1040)) ;
         regions.push_back(GenomeRegion("chr1",  1010,  1090)) ;
         regions.push_back(GenomeRegion("chr1",  1060,  1140)) ;
         regions.push_back(GenomeRegion("chr1",  1070,  1150)) ;
         regions.push_back(GenomeRegion("chr1",  1350,  1430)) ;
         regions.push_back(GenomeRegion("chr1",  1360,  1440)) ;
         regions.push_back(GenomeRegion("chr1",  1410,  1490)) ;
         regions.push_back(GenomeRegion("chr1",  1500,  1600)) ;
         regions.push_back(GenomeRegion("chr1",  1600,  1700)) ;
 
         regions.push_back(GenomeRegion("chr2",  400,   480)) ;
         regions.push_back(GenomeRegion("chr2",  470,   550)) ;
         regions.push_back(GenomeRegion("chr2",  560,   800)) ;
         regions.push_back(GenomeRegion("chr2",  560,   640)) ;
         regions.push_back(GenomeRegion("chr2",  610,   690)) ;
         regions.push_back(GenomeRegion("chr2",  670,   750)) ;
         regions.push_back(GenomeRegion("chr2",  730,   810)) ;
         regions.push_back(GenomeRegion("chr2",  770,   850)) ;
         regions.push_back(GenomeRegion("chr2",  950,   1150)) ;
         regions.push_back(GenomeRegion("chr2",  960,   1040)) ;
         regions.push_back(GenomeRegion("chr2",  1010,  1090)) ;
         regions.push_back(GenomeRegion("chr2",  1060,  1140)) ;
         regions.push_back(GenomeRegion("chr2",  1070,  1150)) ;
         regions.push_back(GenomeRegion("chr2",  1350,  1430)) ;
         regions.push_back(GenomeRegion("chr2",  1360,  1440)) ;
         regions.push_back(GenomeRegion("chr2",  1410,  1490)) ;
         regions.push_back(GenomeRegion("chr2",  1500,  1600)) ;
         regions.push_back(GenomeRegion("chr2",  1600,  1700)) ;
 
         seqan::BamAlignmentRecord record ;
         std::string bam_path = "src/Unittests/data/sc_reads.bam" ;
 
         // read file for fragments starting on + strand
         seqan::BamFileIn bam_file(bam_path.c_str()) ;
         // header
         seqan::BamHeader bam_header ;
         seqan::readHeader(bam_header, bam_file) ;
         for(size_t i=0; not seqan::atEnd(bam_file); i++)
         {   seqan::readRecord(record, bam_file) ;
             if(seqan::hasFlagFirst(record) and not seqan::hasFlagRC(record))
             {   std::cout << regions[i] << "   "
                           << GenomeRegion::constructFragment(record)
                           << std::endl ;
                 CHECK_EQUAL(regions[i], GenomeRegion::constructFragment(record)) ; }
         }
         seqan::close(bam_file) ;
 
         // read file for fragments starting on - strand
         seqan::BamFileIn bam_file(bam_path.c_str()) ;
         // header
         seqan::BamHeader bam_header ;
         seqan::readHeader(bam_header, bam_file) ;
         for(size_t i=0; not seqan::atEnd(bam_file); i++)
         {   seqan::readRecord(record, bam_file) ;
             if(seqan::hasFlagFirst(record) and seqan::hasFlagRC(record))
             {   CHECK_EQUAL(regions[i], GenomeRegion::constructFragment(record)) ; }
         }
         seqan::close(bam_file) ;
     }
     */
 
     TEST(test_contructRead)
     {   // expected content of bam file
         std::list<GenomeRegion> regions_exp ;
         // chromosome 1 -> has index 0 in BAM file header
         regions_exp.push_back(GenomeRegion("chr1", 0, 400,  435)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 400,  435)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 445,  480)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 445,  480)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 470,  505)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 470,  505)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 515,  550)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 515,  550)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 560,  595)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 560,  595)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 560,  595)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 560,  595)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 605,  640)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 605,  640)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 610,  645)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 610,  645)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 655,  690)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 655,  690)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 670,  705)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 670,  705)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 715,  750)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 715,  750)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 730,  765)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 730,  765)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 765,  800)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 765,  800)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 770,  805)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 770,  805)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 775,  810)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 775,  810)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 815,  850)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 815,  850)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 950,  985)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 950,  985)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 960,  995)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 960,  995)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1005, 1040)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1005, 1040)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1010, 1045)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1010, 1045)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1055, 1090)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1055, 1090)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1060, 1095)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1060, 1095)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1070, 1105)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1070, 1105)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1105, 1140)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1105, 1140)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1115, 1150)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1115, 1150)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1115, 1150)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1115, 1150)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1350, 1385)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1350, 1385)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1360, 1395)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1360, 1395)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1395, 1430)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1395, 1430)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1405, 1440)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1405, 1440)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1410, 1445)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1410, 1445)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1455, 1490)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1455, 1490)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1500, 1535)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1500, 1535)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1565, 1600)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1565, 1600)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1600, 1635)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1600, 1635)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1665, 1700)) ;
         regions_exp.push_back(GenomeRegion("chr1", 0, 1665, 1700)) ;
 
         // chromosome 2 -> has index 1 in BAM file header
         regions_exp.push_back(GenomeRegion("chr2", 1, 400,  435)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 400,  435)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 445,  480)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 445,  480)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 470,  505)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 470,  505)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 515,  550)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 515,  550)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 560,  595)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 560,  595)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 560,  595)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 560,  595)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 605,  640)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 605,  640)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 610,  645)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 610,  645)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 655,  690)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 655,  690)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 670,  705)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 670,  705)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 715,  750)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 715,  750)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 730,  765)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 730,  765)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 765,  800)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 765,  800)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 770,  805)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 770,  805)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 775,  810)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 775,  810)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 815,  850)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 815,  850)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 950,  985)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 950,  985)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 960,  995)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 960,  995)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1005, 1040)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1005, 1040)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1010, 1045)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1010, 1045)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1055, 1090)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1055, 1090)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1060, 1095)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1060, 1095)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1070, 1105)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1070, 1105)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1105, 1140)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1105, 1140)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1115, 1150)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1115, 1150)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1115, 1150)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1115, 1150)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1350, 1385)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1350, 1385)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1360, 1395)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1360, 1395)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1395, 1430)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1395, 1430)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1405, 1440)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1405, 1440)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1410, 1445)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1410, 1445)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1455, 1490)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1455, 1490)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1500, 1535)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1500, 1535)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1565, 1600)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1565, 1600)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1600, 1635)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1600, 1635)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1665, 1700)) ;
         regions_exp.push_back(GenomeRegion("chr2", 1, 1665, 1700)) ;
 
         // open file
         seqan::BamFileIn bam_file ;
         if (!seqan::open(bam_file, file_bam.c_str()))
         {   char msg[4096] ;
             sprintf(msg, "ERROR: could not open input file %s", file_bam.c_str()) ;
         }
 
         // read file
         seqan::BamAlignmentRecord record ;
         seqan::BamHeader header ;
         seqan::readHeader(header, bam_file) ;
         std::list<GenomeRegion> regions_val ;
         while(not seqan::atEnd(bam_file))
         {   seqan::readRecord(record, bam_file) ;
             regions_val.push_back(GenomeRegion::constructRead(record, bam_file)) ;
         }
         seqan::close(bam_file) ;
 
         // compare
         CHECK_EQUAL(regions_exp.size(), regions_val.size()) ;
         auto iter_exp = regions_exp.begin() ;
         auto iter_val = regions_val.begin() ;
         while(iter_exp != regions_exp.end())
         {   CHECK_EQUAL(*iter_exp, *iter_val) ;
             iter_exp++ ;
             iter_val++ ;
         }
     }
 
     // tests the method to check overlaps
     TEST(overlap)
     {   GenomeRegion r1("chr1", 0, 20, 30) ; // reference
         GenomeRegion r2("chr1", 0, 20, 30) ; // same as reference
         GenomeRegion r3("chr1", 0,  0, 45) ; // totally contain reference
         GenomeRegion r4("chr1", 0,  0, 10) ; // no overlap, upstream reference
         GenomeRegion r5("chr1", 0, 15, 25) ; // partial overlap reference
         GenomeRegion r6("chr1", 0, 22, 29) ; // inside reference
         GenomeRegion r7("chr1", 0, 25, 35) ; // partial overlap reference
         GenomeRegion r8("chr1", 0, 35, 45) ; // no overlap, downstream reference
         GenomeRegion r9("chr2", 1, 20, 30) ; // diff chromosome
 
         // always check reciprocity
         CHECK_EQUAL(true,  r1 | r1) ;
         CHECK_EQUAL(true,  r1 | r2) ; CHECK_EQUAL(true,  r2 | r1) ;
         CHECK_EQUAL(true,  r1 | r3) ; CHECK_EQUAL(true,  r3 | r1) ;
         CHECK_EQUAL(false, r1 | r4) ; CHECK_EQUAL(false, r4 | r1) ;
         CHECK_EQUAL(true,  r1 | r5) ; CHECK_EQUAL(true,  r5 | r1) ;
         CHECK_EQUAL(true,  r1 | r6) ; CHECK_EQUAL(true,  r6 | r1) ;
         CHECK_EQUAL(true,  r1 | r7) ; CHECK_EQUAL(true,  r7 | r1) ;
         CHECK_EQUAL(false, r1 | r8) ; CHECK_EQUAL(false, r8 | r1) ;
         CHECK_EQUAL(false, r1 | r9) ; CHECK_EQUAL(false, r9 | r1) ;
     }
 
     // tests the methods to get overlap length
     TEST(overlap_len)
     {   GenomeRegion r1("chr1", 0, 10, 20) ; // reference
         GenomeRegion r2("chr1", 0, 10, 20) ; // same as reference
         GenomeRegion r3("chr1", 0,  0, 45) ; // totally contain reference
         GenomeRegion r4("chr2", 1, 10, 20) ; // diff chromosome
 
         // always check reciprocity
         CHECK_EQUAL(10, r1.overlap_len(r1)) ;
         CHECK_EQUAL(10, r1.overlap_len(r2)) ; CHECK_EQUAL(10, r1.overlap_len(r2)) ;
         CHECK_EQUAL(10, r1.overlap_len(r3)) ; CHECK_EQUAL(10, r1.overlap_len(r3)) ;
         CHECK_EQUAL(0,  r1.overlap_len(r4)) ; CHECK_EQUAL(0,  r1.overlap_len(r4)) ;
 
         // slide a smaller region along reference, from before to after
         std::vector<int> overlaps = {0,0,1,2,3,4,4,4,4,4,4,4,3,2,1,0,0,0} ;
         int len = 4 ;
         for(int i=0, start=5; start<23; i++, start++)
         {   int end = start + len ;
             GenomeRegion s1("chr1", 0, start, end) ;
             CHECK_EQUAL(overlaps[i], r1.overlap_len(s1)) ;
             CHECK_EQUAL(overlaps[i], s1.overlap_len(r1)) ;
         }
     }
 
     // tests the is upstream and is downstream operators
     TEST(upstream_downstream)
     {   GenomeRegion r1("chr1", 0, 10, 20) ; // reference
         GenomeRegion r2("chr1", 0, 10, 20) ; // same as reference
         GenomeRegion r3("chr1", 0,  0, 45) ; // totally contain reference
         GenomeRegion r4("chr2", 1, 10, 20) ; // diff chromosome (downstream has 0 < 1)
 
         // always check reciprocity
         CHECK_EQUAL(false, r1 < r1) ; CHECK_EQUAL(false, r1 > r1) ;
         CHECK_EQUAL(false, r1 < r2) ; CHECK_EQUAL(false, r1 > r2) ;
 
         CHECK_EQUAL(false, r1 < r3) ; CHECK_EQUAL(false, r1 < r3) ;
         CHECK_EQUAL(false, r3 < r1) ; CHECK_EQUAL(false, r3 < r1) ;
 
         // not on the same chromosome -> depends on the index value
         CHECK_EQUAL(r1 < r4, true)  ; CHECK_EQUAL(r1 > r4, false) ;
         CHECK_EQUAL(r4 < r1, false) ; CHECK_EQUAL(r4 > r1, true) ;
 
         // slide a smaller region along reference, from before to after
         std::vector<bool> s1_upstream   = {1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} ; // s1 < r1
         std::vector<bool> r1_downstream = {1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} ; // r1 > s1
         std::vector<bool> s1_downstream = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1} ; // s1 > r1
         std::vector<bool> r1_upstream   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1} ; // r1 < s1
         int len = 4 ;
         for(int i=0, start=5; start<23; i++, start++)
         {   // the sliding one
             int end = start + len ;
             GenomeRegion s1("chr1", 0, start, end) ;
 
             CHECK_EQUAL(s1_upstream[i],   s1 < r1) ;
             CHECK_EQUAL(r1_downstream[i], r1 > s1) ;
 
             CHECK_EQUAL(r1_upstream[i],   r1 < s1) ;
             CHECK_EQUAL(s1_downstream[i], s1 > r1) ;
         }
     }
 }
 
 
 // CorrelationMatrixCreator test suite
 SUITE(CorrelationMatrixCreator)
 {
     // displays message
     TEST(message)
     {   std::cout << "Starting CorrelationMatrixCreator tests..." << std::endl ; }
 
     // tests matrix creation with full fragments
     TEST(create_matrix_fragment)
     {   CorrelationMatrixCreator creator(file_bed,
                                             file_bam,
                                             file_bai,
                                             -500,
                                              500,
                                              100,
                                              CorrelationMatrixCreator::FRAGMENT) ;
-        matrix2d_i m_val = creator.create_matrix() ;
-        matrix2d_i m_exp(2,
-                         vector_i(9, 0)) ;
-        m_exp[0][0] = 420 ; m_exp[0][1] = 480 ; m_exp[0][2] = 380 ;
-        m_exp[0][3] =   0 ; m_exp[0][4] = 440 ; m_exp[0][5] = 600 ;
-        m_exp[0][6] =   0 ; m_exp[0][7] =   0 ; m_exp[0][8] = 400 ;
-
-        m_exp[1][0] = 420 ; m_exp[1][1] = 480 ; m_exp[1][2] = 380 ;
-        m_exp[1][3] =   0 ; m_exp[1][4] = 440 ; m_exp[1][5] = 600 ;
-        m_exp[1][6] =   0 ; m_exp[1][7] =   0 ; m_exp[1][8] = 400 ;
-
-        CHECK_EQUAL(m_exp.size(), m_val.size()) ;
-        CHECK_EQUAL(m_exp[0].size(), m_val[0].size()) ;
-
-        for(size_t i=0; i<m_exp.size(); i++)
-        {   for(size_t j=0; j<m_exp[0].size(); j++)
-            {   CHECK_EQUAL(m_exp[i][j], m_val[i][j]) ; }
+        Matrix2D<int> m_val = creator.create_matrix() ;
+        Matrix2D<int> m_exp(2, 9, 0) ;
+        m_exp(0,0) = 420 ; m_exp(0,1) = 480 ; m_exp(0,2) = 380 ;
+        m_exp(0,3) =   0 ; m_exp(0,4) = 440 ; m_exp(0,5) = 600 ;
+        m_exp(0,6) =   0 ; m_exp(0,7) =   0 ; m_exp(0,8) = 400 ;
+
+        m_exp(1,0) = 420 ; m_exp(1,1) = 480 ; m_exp(1,2) = 380 ;
+        m_exp(1,3) =   0 ; m_exp(1,4) = 440 ; m_exp(1,5) = 600 ;
+        m_exp(1,6) =   0 ; m_exp(1,7) =   0 ; m_exp(1,8) = 400 ;
+
+        CHECK_EQUAL(m_exp.get_nrow(), m_val.get_nrow()) ;
+        CHECK_EQUAL(m_exp.get_ncol(), m_val.get_ncol()) ;
+
+        for(size_t i=0; i<m_exp.get_nrow(); i++)
+        {   for(size_t j=0; j<m_exp.get_ncol(); j++)
+            {   CHECK_EQUAL(m_exp(i,j), m_val(i,j)) ; }
         }
     }
 
     // tests matrix creation with fragment centers
     TEST(create_matrix_fragment_center)
     {   CorrelationMatrixCreator creator(file_bed,
                                             file_bam,
                                             file_bai,
                                             -500,
                                              500,
                                              100,
                                             CorrelationMatrixCreator::FRAGMENT_CENTER) ;
-        matrix2d_i m_val = creator.create_matrix() ;
-        matrix2d_i m_exp(2,
-                         vector_i(9, 0)) ;
-        m_exp[0][0] = 2 ; m_exp[0][1] = 6 ; m_exp[0][2] = 4 ;
-        m_exp[0][3] = 0 ; m_exp[0][4] = 2 ; m_exp[0][5] = 8 ;
-        m_exp[0][6] = 0 ; m_exp[0][7] = 0 ; m_exp[0][8] = 4 ;
-
-        m_exp[1][0] = 2 ; m_exp[1][1] = 6 ; m_exp[1][2] = 4 ;
-        m_exp[1][3] = 0 ; m_exp[1][4] = 2 ; m_exp[1][5] = 8 ;
-        m_exp[1][6] = 0 ; m_exp[1][7] = 0 ; m_exp[1][8] = 4 ;
-
-        CHECK_EQUAL(m_exp.size(), m_val.size()) ;
-        CHECK_EQUAL(m_exp[0].size(), m_val[0].size()) ;
-
-        for(size_t i=0; i<m_exp.size(); i++)
-        {   for(size_t j=0; j<m_exp[0].size(); j++)
-            {   CHECK_EQUAL(m_exp[i][j], m_val[i][j]) ; }
+        Matrix2D<int> m_val = creator.create_matrix() ;
+        Matrix2D<int> m_exp(2, 9, 0) ;
+        m_exp(0,0) = 2 ; m_exp(0,1) = 6 ; m_exp(0,2) = 4 ;
+        m_exp(0,3) = 0 ; m_exp(0,4) = 2 ; m_exp(0,5) = 8 ;
+        m_exp(0,6) = 0 ; m_exp(0,7) = 0 ; m_exp(0,8) = 4 ;
+
+        m_exp(1,0) = 2 ; m_exp(1,1) = 6 ; m_exp(1,2) = 4 ;
+        m_exp(1,3) = 0 ; m_exp(1,4) = 2 ; m_exp(1,5) = 8 ;
+        m_exp(1,6) = 0 ; m_exp(1,7) = 0 ; m_exp(1,8) = 4 ;
+
+        CHECK_EQUAL(m_exp.get_nrow(), m_val.get_nrow()) ;
+        CHECK_EQUAL(m_exp.get_ncol(), m_val.get_ncol()) ;
+
+        for(size_t i=0; i<m_exp.get_nrow(); i++)
+        {   for(size_t j=0; j<m_exp.get_ncol(); j++)
+            {   CHECK_EQUAL(m_exp(i,j), m_val(i,j)) ; }
         }
     }
 
     // tests matrix creation with reads
     TEST(create_matrix_read)
     {   CorrelationMatrixCreator creator(file_bed,
                                             file_bam,
                                             file_bai,
                                             -500,
                                              500,
                                              100,
                                             CorrelationMatrixCreator::READ) ;
-        matrix2d_i m_val = creator.create_matrix() ;
-        matrix2d_i m_exp(2,
-                         vector_i(9, 0)) ;
-        m_exp[0][0] = 280 ; m_exp[0][1] = 250 ; m_exp[0][2] = 310 ;
-        m_exp[0][3] =   0 ; m_exp[0][4] = 280 ; m_exp[0][5] = 420 ;
-        m_exp[0][6] =   0 ; m_exp[0][7] =   0 ; m_exp[0][8] = 350 ;
-
-        m_exp[1][0] = 280 ; m_exp[1][1] = 250 ; m_exp[1][2] = 310 ;
-        m_exp[1][3] =   0 ; m_exp[1][4] = 280 ; m_exp[1][5] = 420 ;
-        m_exp[1][6] =   0 ; m_exp[1][7] =   0 ; m_exp[1][8] = 350 ;
-
-        CHECK_EQUAL(m_exp.size(), m_val.size()) ;
-        CHECK_EQUAL(m_exp[0].size(), m_val[0].size()) ;
-
-        for(size_t i=0; i<m_exp.size(); i++)
-        {   for(size_t j=0; j<m_exp[0].size(); j++)
-            {   CHECK_EQUAL(m_exp[i][j], m_val[i][j]) ; }
+        Matrix2D<int> m_val = creator.create_matrix() ;
+        Matrix2D<int> m_exp(2, 9, 0) ;
+        m_exp(0,0) = 280 ; m_exp(0,1) = 250 ; m_exp(0,2) = 310 ;
+        m_exp(0,3) =   0 ; m_exp(0,4) = 280 ; m_exp(0,5) = 420 ;
+        m_exp(0,6) =   0 ; m_exp(0,7) =   0 ; m_exp(0,8) = 350 ;
+
+        m_exp(1,0) = 280 ; m_exp(1,1) = 250 ; m_exp(1,2) = 310 ;
+        m_exp(1,3) =   0 ; m_exp(1,4) = 280 ; m_exp(1,5) = 420 ;
+        m_exp(1,6) =   0 ; m_exp(1,7) =   0 ; m_exp(1,8) = 350 ;
+
+        CHECK_EQUAL(m_exp.get_nrow(), m_val.get_nrow()) ;
+        CHECK_EQUAL(m_exp.get_ncol(), m_val.get_ncol()) ;
+
+        for(size_t i=0; i<m_exp.get_nrow(); i++)
+        {   for(size_t j=0; j<m_exp.get_ncol(); j++)
+            {   CHECK_EQUAL(m_exp(i,j), m_val(i,j)) ; }
         }
     }
 
 
     // tests matrix creation with ATAC-seq reads
     TEST(create_matrix_read_atac)
     {   CorrelationMatrixCreator creator(file_bed,
                                             file_bam,
                                             file_bai,
                                             -500,
                                              500,
                                              100,
                                             CorrelationMatrixCreator::READ_ATAC) ;
-        matrix2d_i m_val = creator.create_matrix() ;
-        matrix2d_i m_exp(2,
-                         vector_i(9, 0)) ;
-        m_exp[0][0] = 8 ; m_exp[0][1] = 8 ; m_exp[0][2] =  8 ;
-        m_exp[0][3] = 0 ; m_exp[0][4] = 8 ; m_exp[0][5] = 12 ;
-        m_exp[0][6] = 0 ; m_exp[0][7] = 0 ; m_exp[0][8] = 10 ;
-
-        m_exp[1][0] = 8 ; m_exp[1][1] = 8 ; m_exp[1][2] =  8 ;
-        m_exp[1][3] = 0 ; m_exp[1][4] = 8 ; m_exp[1][5] = 12 ;
-        m_exp[1][6] = 0 ; m_exp[1][7] = 0 ; m_exp[1][8] = 10 ;
-
-        CHECK_EQUAL(m_exp.size(), m_val.size()) ;
-        CHECK_EQUAL(m_exp[0].size(), m_val[0].size()) ;
-
-        for(size_t i=0; i<m_exp.size(); i++)
-        {   for(size_t j=0; j<m_exp[0].size(); j++)
-            {   CHECK_EQUAL(m_exp[i][j], m_val[i][j]) ; }
+        Matrix2D<int> m_val = creator.create_matrix() ;
+        Matrix2D<int> m_exp(2, 9, 0) ;
+        m_exp(0,0) = 8 ; m_exp(0,1) = 8 ; m_exp(0,2) =  8 ;
+        m_exp(0,3) = 0 ; m_exp(0,4) = 8 ; m_exp(0,5) = 12 ;
+        m_exp(0,6) = 0 ; m_exp(0,7) = 0 ; m_exp(0,8) = 10 ;
+
+        m_exp(1,0) = 8 ; m_exp(1,1) = 8 ; m_exp(1,2) =  8 ;
+        m_exp(1,3) = 0 ; m_exp(1,4) = 8 ; m_exp(1,5) = 12 ;
+        m_exp(1,6) = 0 ; m_exp(1,7) = 0 ; m_exp(1,8) = 10 ;
+
+        CHECK_EQUAL(m_exp.get_nrow(), m_val.get_nrow()) ;
+        CHECK_EQUAL(m_exp.get_ncol(), m_val.get_ncol()) ;
+
+        for(size_t i=0; i<m_exp.get_nrow(); i++)
+        {   for(size_t j=0; j<m_exp.get_ncol(); j++)
+            {   CHECK_EQUAL(m_exp(i,j), m_val(i,j)) ; }
         }
     }
 }
diff --git a/src/Unittests/unittests_matrix.cpp b/src/Unittests/unittests_matrix.cpp
index ebc3333..429f27d 100644
--- a/src/Unittests/unittests_matrix.cpp
+++ b/src/Unittests/unittests_matrix.cpp
@@ -1,1604 +1,1603 @@
 #include <UnitTest++/UnitTest++.h>
 #include <numeric> // accumulate()
 
 
 #include <Matrix.hpp>
 #include <Matrix2D.hpp>
 #include <Matrix3D.hpp>
 #include <Matrix4D.hpp>
 
 /*!
  * \brief Given a matrix and an offset, this methods converts
  * the offset into a coordinates vector (row, col, ...). It is
  * a simple copy/paste of Matrix<T>::convert_to_coord() which is
  * private.
  * \param m a matrix.
  * \param offset an offset
  * \return a vector of coordinates (row,col,...) corresponding to
  * the offset for the given matrix.
  */
 std::vector<size_t> convert_to_coord(const Matrix<int>& m, size_t offset)
 {
     std::vector<size_t> dim = m.get_dim() ; // (row, col, ...) format
     if(dim.size() > 1)
     {   std::swap(dim[0], dim[1]) ; } // (x,y,...) format
 
     std::vector<size_t> coord(dim.size(), 0) ;
     std::vector<size_t> dim_prod(dim.size(), 0) ;
     dim_prod[0] = 1 ;
     if(dim.size() > 1)
     {   dim_prod[1] = dim[0] ; }
     if(dim.size() > 2)
     {   for(size_t i=2; i<dim.size(); i++)
         {   dim_prod[i] = dim_prod[i-1]*dim[i-1] ; }
     }
 
     for(int i=dim.size()-1; i>=0; i--)
     {   size_t c = offset / dim_prod[i] ;
         coord[i] = c ;
         offset -= (dim_prod[i]*c) ;
     }
 
     if(dim.size() > 1)
     {   std::swap(coord[0], coord[1]) ; } // (row,col,...) format
     return coord ;
 }
 
-/*
 // Matrix test suite
 SUITE(Matrix)
 {   // displays message
     TEST(message)
     {   std::cout << "Starting Matrix tests..." << std::endl ; }
 
     // tests normal constructor
     TEST(constructor)
     {
         std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             CHECK_EQUAL(dim_1.size(), m1.get_dim_size()) ;
             CHECK_ARRAY_EQUAL(dim_1, m1.get_dim(), dim_1.size()) ;
             CHECK_EQUAL(data_size_1, m1.get_data_size()) ;
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             CHECK_EQUAL(dim_2.size(), m2.get_dim_size()) ;
             CHECK_ARRAY_EQUAL(dim_2, m2.get_dim(), dim_2.size()) ;
             CHECK_EQUAL(data_size_2, m2.get_data_size()) ;
             CHECK_EQUAL(data_size_2, m2.get_data().size()) ;
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             CHECK_EQUAL(dim_3.size(), m3.get_dim_size()) ;
             CHECK_ARRAY_EQUAL(dim_3, m3.get_dim(), dim_3.size()) ;
             CHECK_EQUAL(data_size_3, m3.get_data_size()) ;
             CHECK_EQUAL(data_size_3, m3.get_data().size()) ;
         }
 
     }
 
     // tests contructor with value
     TEST(constructor_value)
     {
         std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             CHECK_EQUAL(dim_1.size(), m1.get_dim_size()) ;
             CHECK_ARRAY_EQUAL(dim_1, m1.get_dim(), dim_1.size()) ;
             CHECK_EQUAL(data_size_1, m1.get_data_size()) ;
             for(const auto x : m1.get_data())
             {   CHECK_EQUAL(i, x) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             CHECK_EQUAL(dim_2.size(), m2.get_dim_size()) ;
             CHECK_ARRAY_EQUAL(dim_2, m2.get_dim(), dim_2.size()) ;
             CHECK_EQUAL(data_size_2, m2.get_data_size()) ;
             CHECK_EQUAL(data_size_2, m2.get_data().size()) ;
             for(const auto x : m2.get_data())
             {   CHECK_EQUAL(i, x) ; }
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             CHECK_EQUAL(dim_3.size(), m3.get_dim_size()) ;
             CHECK_ARRAY_EQUAL(dim_3, m3.get_dim(), dim_3.size()) ;
             CHECK_EQUAL(data_size_3, m3.get_data_size()) ;
             CHECK_EQUAL(data_size_3, m3.get_data().size()) ;
             for(const auto x : m3.get_data())
             {   CHECK_EQUAL(i, x) ; }
         }
     }
 
     // tests the get() method, compare a value get with offset with the value get with coordinates
     // (computed from offset)
     TEST(get)
     {
         std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), m1.get(convert_to_coord(m1, j))) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), m2.get(convert_to_coord(m2, j))) ; }
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), m3.get(convert_to_coord(m3, j))) ; }
         }
     }
 
 
     // test the set() method, set a value and then check it using get()
     TEST(set)
     {
         std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,j) ; }
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), j) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,j) ; }
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), j) ; }
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,j) ; }
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), j) ; }
         }
     }
 
     // tests the == and =! operators
     TEST(comparison_operators)
     {
         std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,j) ; m1_2.set(j,j) ; }
             // equal content
             CHECK_EQUAL(true, m1 == m1) ;
             CHECK_EQUAL(true, m1 == m1_2) ;
             // different content
             m1.set(data_size_1-1, -100) ;
             CHECK_EQUAL(false, m1 == m1_2) ;
             CHECK_EQUAL(true,  m1 != m1_2) ;
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,j) ; m2_2.set(j,j) ; }
             // equal content
             CHECK_EQUAL(true, m2 == m2) ;
             CHECK_EQUAL(true, m2 == m2_2) ;
             // different content
             m2.set(data_size_2 == 0 ? 0 : data_size_2-1, -100) ; // sometimes contains 0 elements
             CHECK_EQUAL(false, m2 == m2_2) ;
             CHECK_EQUAL(true,  m2 != m2_2) ;
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,j) ; m3_2.set(j,j) ; }
             // equal content
             CHECK_EQUAL(true, m3 == m3) ;
             CHECK_EQUAL(true, m3 == m3_2) ;
 
             // different dimensions
             CHECK_EQUAL(false, m1 == m3) ;
             CHECK_EQUAL(true,  m1 != m3) ;
         }
     }
 
     // tests the = operator
     TEST(assignment_operator)
     {
         std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,j) ; }
             m1_2 = m1 ;
             CHECK_EQUAL(true, m1 == m1_2) ;
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,j) ; }
             m2_2 = m2 ;
             CHECK_EQUAL(true, m2 == m2_2) ;
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,j) ; }
             m3_2 = m3 ;
             CHECK_EQUAL(true, m3 == m3_2) ;
         }
     }
 
     // test the + operator
     TEST(operator_addition)
     {   std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,1) ; }
             m1 = m1 + 1 ;
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), 2) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,1) ; }
             m2 = m2 + 1 ;
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), 2) ; }
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,1) ; }
             m3 = m3 + 1 ;
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), 2) ; }
         }
     }
 
     // test the += operator
     TEST(operator_plusequal)
     {   std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,1) ; }
             m1 += 1 ;
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), 2) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,1) ; }
             m2 += 1 ;
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), 2) ; }
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,1) ; }
             m3 += 1 ;
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), 2) ; }
         }
     }
 
     // test the - operator
     TEST(operator_substraction)
     {   std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,3) ; }
             m1 = m1 - 1 ;
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), 2) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,3) ; }
             m2 = m2 - 1 ;
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), 2) ; }
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,3) ; }
             m3 = m3 - 1 ;
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), 2) ; }
         }
     }
 
     // test the -= operator
     TEST(operator_minusequal)
     {   std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,3) ; }
             m1 -= 1 ;
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), 2) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,3) ; }
             m2 -= 1 ;
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), 2) ; }
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,3) ; }
             m3 -= 1 ;
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), 2) ; }
         }
     }
 
     // test the * operator
     TEST(operator_multiplication)
     {   std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,3) ; }
             m1 = m1 * 2 ;
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), 6) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,3) ; }
             m2 = m2 * 2 ;
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), 6) ; }
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,3) ; }
             m3 = m3 * 2 ;
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), 6) ; }
         }
     }
 
     // tests the *= operator
     TEST(operator_timeequal)
     {   std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,3) ; }
             m1 *= 2 ;
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), 6) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,3) ; }
             m2 *= 2 ;
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), 6) ; }
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,3) ; }
             m3 *= 2 ;
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), 6) ; }
         }
     }
 
     // test the / operator
     TEST(operator_division)
     {   std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,6) ; }
             m1 = m1 / 2 ;
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), 3) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,6) ; }
             m2 = m2 / 2 ;
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), 3) ; }
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,6) ; }
             m3 = m3 / 2 ;
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), 3) ; }
         }
     }
 
     // tests the /= operator
     TEST(operator_divideequal)
     {   std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             Matrix<int> m1_2(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,6) ; }
             m1 /= 2 ;
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), 3) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,6) ; }
             m2 /= 2 ;
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), 3) ; }
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,6) ; }
             m3 /= 2 ;
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), 3) ; }
         }
     }
 
     // tests the copy constuctor, not before because it uses the == operator to
     // check that the content of two matrices are equal.
     TEST(constructor_copy)
     {   std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,j) ; }
             Matrix<int> m1_2(m1) ;
             CHECK_EQUAL(true, m1 == m1_2) ;
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,j) ; }
             Matrix<int> m2_2(m2) ;
             CHECK_EQUAL(true, m2 == m2_2) ;
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,j) ; }
             Matrix<int> m3_2(m3) ;
             CHECK_EQUAL(true, m3 == m3_2) ;
         }
     }
 
     // tests the () operator
     TEST(parenthesis_operator)
     {
         std::vector<size_t> dim_1, dim_2, dim_3 ;
         size_t data_size_1, data_size_2, data_size_3 ;
 
         // from 0D to 10D
         for(size_t i=1; i<11; i++)
         {
             dim_1.push_back(i+1) ;
             dim_2.push_back(i) ;
             dim_3.push_back(0) ;
 
             // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11
             Matrix<int> m1(dim_1, i) ;
             data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_1; j++)
             {   m1.set(j,j) ; }
             for(size_t j=0; j<data_size_1; j++)
             {   CHECK_EQUAL(m1.get(j), m1(convert_to_coord(m1, j))) ; }
 
             // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10
             Matrix<int> m2(dim_2, i) ;
             Matrix<int> m2_2(dim_2, i) ;
             data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_2; j++)
             {   m2.set(j,j) ; }
             for(size_t j=0; j<data_size_2; j++)
             {   CHECK_EQUAL(m2.get(j), m2(convert_to_coord(m2, j))) ; }
 
 
             // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0
             Matrix<int> m3(dim_3, i) ;
             Matrix<int> m3_2(dim_3, i) ;
             data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies<int>()) ;
             for(size_t j=0; j<data_size_3; j++)
             {   m3.set(j,j) ; }
             for(size_t j=0; j<data_size_3; j++)
             {   CHECK_EQUAL(m3.get(j), m3(convert_to_coord(m3, j))) ; }
         }
     }
 }
 
 
 SUITE(Matrix2D)
 {   // displays message
     TEST(message)
     {   std::cout << "Starting Matrix2D tests..." << std::endl ; }
 
     // tests contructor
     TEST(constructor)
     {   for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   std::vector<size_t> dim = {i,j} ;
                 Matrix2D<int> m(i,j) ;
                 CHECK_EQUAL(dim.size(), m.get_dim_size()) ;
                 CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ;
                 CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies<int>()),
                             m.get_data_size()) ;
             }
         }
     }
 
     // tests contructor with value
     TEST(constructor_value)
     {   int n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   std::vector<size_t> dim = {i,j} ;
                 Matrix2D<int> m(i,j,n) ;
                 CHECK_EQUAL(dim.size(), m.get_dim_size()) ;
                 CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ;
                 CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies<int>()),
                             m.get_data_size()) ;
                 for(const auto& i : m.get_data())
                 {   CHECK_EQUAL(n, i) ; }
             }
         }
     }
 
     // tests the copy constructor
     TEST(constructor_copy)
     {
         for(size_t i=1; i<11; i++)
         {   std::vector<size_t> dim ;
 
             // has non-0 dimensions : 1x2 / 2x3 / ...
             dim = {i, i+1} ;
             Matrix2D<int> m1(i,i+1) ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   m1.set(j,j) ; }
             Matrix2D<int> m1_2(m1) ;
             CHECK_EQUAL(true, m1 == m1_2) ;
 
             // always has a zero dimension : // has a zero dimension : 0x1 / 0x2 / ...
             dim = {0, i} ;
             Matrix2D<int> m2(0,i) ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   m2.set(j,j) ; }
             Matrix2D<int> m2_2(m2) ;
             CHECK_EQUAL(true, m2 == m2_2) ;
 
             // is a 0 dimension matrix : 0x0
             dim = {0, 0} ;
             Matrix2D<int> m3(0,0) ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   m3.set(j,j) ; }
             Matrix2D<int> m3_2(m3) ;
             CHECK_EQUAL(true, m3 == m3_2) ;
         }
     }
 
     // tests the get() method, compare a value get with offset with the value get with coordinates
     // (computed from offset)
     TEST(get)
     {
         for(size_t i=1; i<11; i++)
         {   std::vector<size_t> dim ;
 
             // has non-0 dimensions : 1x2 / 2x3 / ...
             Matrix2D<int> m1(i,i+1, i) ;
             dim = {i,i+1} ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   std::vector<size_t> coord = convert_to_coord(m1, j) ;
                 CHECK_EQUAL(m1.get(j), m1.get(coord[0], coord[1])) ; }
 
             // has a zero dimension : 0x1 / 0x2 / ...
             Matrix2D<int> m2(0,i,i) ;
             dim = {0,i} ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   std::vector<size_t> coord = convert_to_coord(m2, j) ;
                 CHECK_EQUAL(m2.get(j), m2.get(coord[0], coord[1])) ; }
 
             // has zero dimensions :  0x0
             Matrix2D<int> m3(0,0,i) ;
             dim = {0,0} ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   std::vector<size_t> coord = convert_to_coord(m3, j) ;
                 CHECK_EQUAL(m3.get(j), m3.get(coord[0], coord[1])) ;
             }
         }
     }
 
 
     // test the set() method, set a value and then check it using get()
     TEST(set)
     {
         for(size_t i=1; i<11; i++)
         {   std::vector<size_t> dim ;
 
             // has non-0 dimensions : 1x2 / 2x3 / ...
             Matrix2D<int> m1(i,i+1, i) ;
             dim = {i,i+1} ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   std::vector<size_t> coord = convert_to_coord(m1, j) ;
                 m1.set(coord[0], coord[1], j) ;
             }
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   CHECK_EQUAL(j, m1.get(j)) ; }
 
             // has a zero dimension : 0x1 / 0x2 / ...
             Matrix2D<int> m2(0,i,i) ;
             dim = {0,i} ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   std::vector<size_t> coord = convert_to_coord(m2, j) ;
                 m2.set(coord[0], coord[1], j) ;
             }
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   CHECK_EQUAL(j, m2.get(j)) ; }
 
             // has zero dimensions :  0x0
             Matrix2D<int> m3(0,0,i) ;
             dim = {0,0} ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   std::vector<size_t> coord = convert_to_coord(m3, j) ;
                 m3.set(coord[0], coord[1], j) ;
             }
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   CHECK_EQUAL(j, m3.get(j)) ; }
         }
     }
 
     // tests get_nrow()
     TEST(get_nrow)
     {   for(size_t i=1; i<11; i++)
         {
             // has non-0 dimensions : 1x2 / 2x3 / ...
             Matrix2D<int> m1(i,i+1) ;
             CHECK_EQUAL(i, m1.get_nrow()) ;
 
             // always has a zero dimension : // has a zero dimension : 0x1 / 0x2 / ...
             Matrix2D<int> m2(0,i) ;
             CHECK_EQUAL(0, m2.get_nrow()) ;
 
             // is a 0 dimension matrix : 0x0
             Matrix2D<int> m3(0,0) ;
             CHECK_EQUAL(0, m3.get_nrow()) ;
         }
     }
 
     // tests get_ncol()
     TEST(get_ncol)
     {   for(size_t i=1; i<11; i++)
         {
             // has non-0 dimensions : 1x2 / 2x3 / ...
             Matrix2D<int> m1(i,i+1) ;
             CHECK_EQUAL(i+1, m1.get_ncol()) ;
 
             // always has a zero dimension : // has a zero dimension : 0x1 / 0x2 / ...
             Matrix2D<int> m2(0,i) ;
             CHECK_EQUAL(i, m2.get_ncol()) ;
 
             // is a 0 dimension matrix : 0x0
             Matrix2D<int> m3(0,0) ;
             CHECK_EQUAL(0, m3.get_ncol()) ;
         }
     }
 
     // tests get_row()
     TEST(get_row)
     {   for(size_t i=0; i<11; i++)
         {
             Matrix2D<int> m(5,i) ;
             for(size_t j=0; j<m.get_nrow()*m.get_ncol(); j++)
             {   m.set(j, j) ; }
 
             for(size_t j=0; j<m.get_nrow(); j++)
             {
                 std::vector<int> row(m.get_ncol()) ;
                 for(size_t n=0, k=j*m.get_ncol(); n<m.get_ncol(); n++, k++)
                 {   row[n] = k ; }
 
                 CHECK_EQUAL(i, m.get_row(j).size()) ;
                 CHECK_ARRAY_EQUAL(row, m.get_row(j), row.size()) ;
             }
             CHECK_THROW(m.get_row(9999), std::out_of_range) ;
         }
     }
 
     // tests get_col()
     TEST(get_col)
     {   for(size_t i=4; i<5; i++)
         {
             Matrix2D<int> m(i,5) ;
             for(size_t j=0; j<m.get_nrow()*m.get_ncol(); j++)
             {   m.set(j, j) ; }
 
             for(size_t j=0; j<m.get_ncol(); j++)
             {
                 std::vector<int> col(m.get_nrow()) ;
                 for(size_t n=0, k=j; n<m.get_nrow(); n++, k+=m.get_ncol())
                 {   col[n] = k ; }
 
                 CHECK_EQUAL(i, m.get_col(j).size()) ;
                 CHECK_ARRAY_EQUAL(col, m.get_col(j), col.size()) ;
             }
             CHECK_THROW(m.get_col(9999), std::out_of_range) ;
         }
     }
 
     // tests set_row()
     TEST(set_row)
     {   for(size_t i=0; i<11; i++)
         {
             Matrix2D<int> m(5,i) ;
             for(size_t j=0; j<m.get_nrow()*m.get_ncol(); j++)
             {   m.set(j, j) ; }
 
             for(size_t j=0; j<m.get_nrow(); j++)
             {   std::vector<int> new_row(i, 999) ;
                 m.set_row(j, new_row) ;
                 CHECK_EQUAL(i, m.get_row(j).size()) ;
                 CHECK_ARRAY_EQUAL(new_row, m.get_row(j), new_row.size()) ;
             }
 
             CHECK_THROW(m.set_row(9999, std::vector<int>(i,0)),   std::out_of_range) ;
             CHECK_THROW(m.set_row(0,    std::vector<int>(i+1,0)), std::invalid_argument) ;
         }
     }
 
     // tests set_col()
     TEST(set_col)
     {   for(size_t i=0; i<11; i++)
         {
             Matrix2D<int> m(i,5) ;
             for(size_t j=0; j<m.get_nrow()*m.get_ncol(); j++)
             {   m.set(j, j) ; }
 
             for(size_t j=0; j<m.get_ncol(); j++)
             {   std::vector<int> new_col(i, 999) ;
                 m.set_col(j, new_col) ;
                 CHECK_EQUAL(i, m.get_col(j).size()) ;
                 CHECK_ARRAY_EQUAL(new_col, m.get_col(j), new_col.size()) ;
             }
             CHECK_THROW(m.set_col(9999, std::vector<int>(i,0)),   std::out_of_range) ;
             CHECK_THROW(m.set_col(0,    std::vector<int>(i+1,0)), std::invalid_argument) ;
         }
     }
 
     TEST(parenthesis_operator)
     {   for(size_t i=1; i<11; i++)
         {   std::vector<size_t> dim ;
 
             // has non-0 dimensions : 1x2 / 2x3 / ...
             Matrix2D<int> m1(i,i+1, i) ;
             dim = {i,i+1} ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   std::vector<size_t> coord = convert_to_coord(m1, j) ;
                 m1(coord[0], coord[1]) = j ;
             }
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   CHECK_EQUAL(j, m1.get(j)) ; }
 
             // has a zero dimension : 0x1 / 0x2 / ...
             Matrix2D<int> m2(0,i,i) ;
             dim = {0,i} ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   std::vector<size_t> coord = convert_to_coord(m2, j) ;
                 m2(coord[0], coord[1]) = j ;
             }
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   CHECK_EQUAL(j, m2.get(j)) ; }
 
             // has zero dimensions :  0x0
             Matrix2D<int> m3(0,0,i) ;
             dim = {0,0} ;
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   std::vector<size_t> coord = convert_to_coord(m3, j) ;
                 m3(coord[0], coord[1]) = j ;
             }
             for(size_t j=0; j<dim[0]*dim[1]; j++)
             {   CHECK_EQUAL(j, m3.get(j)) ; }
         }
 
     }
 
     // tests contructor from file
     // now because it uses previously tested methods
     TEST(constructor_file)
     {   std::string file_int1("./src/Unittests/data/matrix2d_int1.mat") ;
         std::string file_int2("./src/Unittests/data/matrix2d_int2.mat") ;
         std::string file_int3("./src/Unittests/data/matrix2d_int3.mat") ;
         std::string file_int4("./src/Unittests/data/matrix2d_int4.mat") ;
         std::string file_int5("./src/Unittests/data/matrix2d_int5.mat") ;
         std::string file_int6("./src/Unittests/data/matrix2d_int6.mat") ;
         std::string file_int7("./src/Unittests/data/matrix2d_int7.mat") ;
         std::string file_int8("./src/Unittests/data/matrix2d_int8.mat") ;
         std::string file_int9("./src/Unittests/data/matrix2d_int9.mat") ;
 
         std::string file_char1("./src/Unittests/data/matrix2d_char1.mat") ;
         std::string file_double1("./src/Unittests/data/matrix2d_double1.mat") ;
         std::string file_ghost("./src/Unittests/data/foo.mat") ;
 
 
         std::vector<std::vector<int>>     v_int({{0,1,2,3},{4,5,6,7}}) ;
         std::vector<std::vector<char>>    v_char({{'A','A','A'},{'C','C','C'},
                                                   {'G','G','G'},{'T','T','T'}}) ;
         std::vector<std::vector<double>>  v_double({{0.,1.,2.,3.},{4.,5.,6.,7.}}) ;
 
         Matrix2D<int>  m_int(2,4)  ;  m_int.set_row(0, {0,1,2,3}) ; m_int.set_row(1, {4,5,6,7}) ;
         Matrix2D<char> m_char(4,3) ;  m_char.set_row(0, {'A','A','A'}) ; m_char.set_row(1, {'C','C','C'}) ;
                                       m_char.set_row(2, {'G','G','G'}) ; m_char.set_row(3, {'T','T','T'}) ;
         Matrix2D<double> m_dbl(2,4) ; m_dbl.set_row(0, {0.,1.,2.,3.}) ; m_dbl.set_row(1, {4.,5.,6.,7.}) ;
 
         // matrix of int
         Matrix2D<int> m_int1(file_int1) ; // this one is perfect
         Matrix2D<int> m_int2(file_int2) ; // this one has inhomogeneous spaceers but is OK
 
         CHECK_EQUAL(m_int, m_int1) ;
         CHECK_EQUAL(m_int, m_int2) ;
 
         // matrix with only 1 int
         Matrix2D<int> m_int3(file_int7) ;
         CHECK_EQUAL( Matrix2D<int>(1,1,1), m_int3) ;
 
         // empty matrix (empty file)
         Matrix2D<int> m_int4(file_int8) ;
         CHECK_EQUAL(Matrix2D<int>(0,0), m_int4) ;
 
         // empty matrix (only eol in file)
         Matrix2D<int> m_int5(file_int9) ;
         CHECK_EQUAL(Matrix2D<int>(0,0), m_int5) ;
 
         // these files are not well formatted
         CHECK_THROW(m_int2 = Matrix2D<int>(file_int3), std::runtime_error) ; // data are inhomogeneous
         CHECK_THROW(m_int2 = Matrix2D<int>(file_int4), std::runtime_error) ; // empty line
         CHECK_THROW(m_int2 = Matrix2D<int>(file_int5), std::runtime_error) ; // empty line
         CHECK_THROW(m_int2 = Matrix2D<int>(file_int6), std::runtime_error) ; // empty line
 
         // matrix of char
         Matrix2D<char> m_char1(file_char1) ;
         CHECK_EQUAL(m_char, m_char1) ;
 
         // matrix of double
         Matrix2D<double> m_dbl1(file_double1) ;
         CHECK_EQUAL(m_dbl, m_dbl1) ;
 
         // file does not exist
         CHECK_THROW(Matrix2D<int> m_int2(file_ghost), std::runtime_error) ;
     }
 
     // tests file format, writting a matrix and reading it should return the
     // same matrix, uses set() and the == operator
     // loading an empty file is not allowed (has no meaning, the file is empty)
     TEST(file_format)
     {   for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   Matrix2D<int> m(i,j) ;
                 for(size_t a=0; a<m.get_data_size(); a++)
                 {   m.set(a, a) ; }
                 std::ofstream file("./src/Unittests/data/matrix2d_out.mat") ;
                 // write to file
                 file << m << std::endl ;
                 file.close() ;
                 // read from file
                 Matrix2D<int> m2("./src/Unittests/data/matrix2d_out.mat") ;
                 // any matrix with at least one zero dimension is a null
                 // matrix
                 if(i==0 or j==0)
                 {   CHECK_EQUAL(Matrix2D<int>(0,0), m2) ; }
                 else
                 {    CHECK_EQUAL(m, m2) ; }
             }
         }
     }
 }
 
 
 
 
 SUITE(Matrix3D)
 {   // displays message
     TEST(message)
     {   std::cout << "Starting Matrix3D tests..." << std::endl ; }
 
 
     // tests constructor
     TEST(constructor)
     {   for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   std::vector<size_t> dim = {i,j,k} ;
                     Matrix3D<int> m(i,j,k) ;
                     CHECK_EQUAL(dim.size(), m.get_dim_size()) ;
                     CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ;
                     CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies<int>()),
                                 m.get_data_size()) ;
                 }
             }
         }
     }
 
     // test constructor value
     TEST(constructor_value)
     {   int  n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   std::vector<size_t> dim = {i,j,k} ;
                     Matrix3D<int> m(i,j,k,n) ;
                     CHECK_EQUAL(dim.size(), m.get_dim_size()) ;
                     CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ;
                     CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies<int>()),
                                 m.get_data_size()) ;
                     for(const auto& i : m.get_data())
                     {   CHECK_EQUAL(n, i) ; }
                 }
             }
         }
     }
 
     // tests copy constructor
     TEST(constructor_copy)
     {   int  n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   std::vector<size_t> dim = {i,j,k} ;
                     Matrix3D<int> m(i,j,k,n) ;
                     Matrix3D<int> m2(m) ;
                     CHECK_EQUAL(m, m2) ;
                 }
             }
         }
     }
 
     // tests contructor from file, uses the == operator
     TEST(constructor_file)
     {   std::string file_int1("./src/Unittests/data/matrix3d_int1.mat") ;
         std::string file_int2("./src/Unittests/data/matrix3d_int2.mat") ;
         std::string file_int3("./src/Unittests/data/matrix3d_int3.mat") ;
         std::string file_int4("./src/Unittests/data/matrix3d_int4.mat") ;
         std::string file_int5("./src/Unittests/data/matrix3d_int5.mat") ;
         std::string file_int6("./src/Unittests/data/matrix3d_int6.mat") ;
         std::string file_int7("./src/Unittests/data/matrix3d_int7.mat") ;
         std::string file_int8("./src/Unittests/data/matrix3d_int8.mat") ;
         std::string file_int9("./src/Unittests/data/matrix3d_int9.mat") ;
         std::string file_int10("./src/Unittests/data/matrix3d_int10.mat") ;
         std::string file_int11("./src/Unittests/data/matrix3d_int11.mat") ;
         std::string file_int12("./src/Unittests/data/matrix3d_int12.mat") ;
         std::string file_int13("./src/Unittests/data/matrix3d_int13.mat") ;
         std::string file_int14("./src/Unittests/data/matrix3d_int14.mat") ;
         std::string file_double("./src/Unittests/data/matrix3d_double.mat") ;
         std::string file_ghost("./src/Unittests/data/foo.mat") ;
 
 
         std::vector<int> v_int = {-1,0,2,0,
                                    0,3,0,4,
                                    0,0,0,0,
                                    0,0,0,0,
                                    0,5,-6,0,
                                    0,7,0,0} ;
 
         std::vector<int> v_int2 = {1} ;
 
         std::vector<double> v_dbl = {-1.,0., 2.,0.,
                                       0.,3., 0.,4.,
                                       0.,0., 0.,0.,
                                       0.,0., 0.,0.,
                                       0.,5.,-6.,0.,
                                       0.,7., 0.,0.} ;
 
         std::vector<size_t> dim = {2,4,3} ;
         std::vector<size_t> dim2 = {1,1,1} ;
 
         // matrix of int
         Matrix3D<int> m_int(file_int1) ;
         CHECK_EQUAL(dim.size(), m_int.get_dim_size()) ;
         CHECK_ARRAY_EQUAL(dim, m_int.get_dim(), dim.size()) ;
         CHECK_EQUAL(v_int.size(), m_int.get_data_size()) ;
         CHECK_ARRAY_EQUAL(v_int, m_int.get_data(), v_int.size()) ;
 
         // matrix with only 1 int
         Matrix3D<int> m_int2(file_int12) ;
         CHECK_EQUAL(Matrix3D<int>(1,1,1,1), m_int2) ;
 
         // empty matrix (empty file)
         Matrix3D<int> m_int3(file_int13) ;
         CHECK_EQUAL(Matrix3D<int>(0,0,0), m_int3) ;
 
         // empty matrix (only eol in file)
         Matrix3D<int> m_int4(file_int13) ;
         CHECK_EQUAL(Matrix3D<int>(0,0,0), m_int4) ;
 
         // these files are not well formatted
         CHECK_THROW(Matrix3D<int> m_int3(file_int2),  std::runtime_error) ; // mixed data types
         CHECK_THROW(Matrix3D<int> m_int3(file_int3),  std::runtime_error) ; // slice of variable dim
         CHECK_THROW(Matrix3D<int> m_int3(file_int4),  std::runtime_error) ; // slice of variable dim
         CHECK_THROW(Matrix3D<int> m_int3(file_int5),  std::runtime_error) ; // slice of variable dim
         CHECK_THROW(Matrix3D<int> m_int3(file_int6),  std::runtime_error) ;  // empty line
         CHECK_THROW(Matrix3D<int> m_int3(file_int7),  std::runtime_error) ;  // empty line
         CHECK_THROW(Matrix3D<int> m_int3(file_int8),  std::runtime_error) ;  // empty line
         CHECK_THROW(Matrix3D<int> m_int3(file_int9),  std::runtime_error) ;  // empty line
         CHECK_THROW(Matrix3D<int> m_int3(file_int10), std::runtime_error) ;  // empty line
         CHECK_THROW(Matrix3D<int> m_int3(file_int11), std::runtime_error) ;  // empty line
 
         // this file does not exist
         CHECK_THROW(Matrix3D<int> m_int3(file_ghost), std::runtime_error) ;
 
         // matrix of double
         Matrix3D<double> m_double(file_double) ;
         CHECK_EQUAL(dim.size(), m_double.get_dim_size()) ;
         CHECK_ARRAY_EQUAL(dim, m_double.get_dim(), dim.size()) ;
         CHECK_EQUAL(v_int.size(), m_double.get_data_size()) ;
         CHECK_ARRAY_EQUAL(v_int, m_double.get_data(), v_int.size()) ;
     }
 
     // tests get()
     TEST(get)
     {   int n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   std::vector<size_t> dim = {i,j,k} ;
                     Matrix3D<int> m(i,j,k,n) ;
                     for(size_t l=0; l<m.get_data_size(); l++)
                     {   std::vector<size_t> coord = convert_to_coord(m, l) ;
                         CHECK_EQUAL(m.get(l), m.get(coord[0], coord[1], coord[2])) ;
                     }
                 }
             }
         }
     }
 
     // tests set()
     TEST(set)
     {   int n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   std::vector<size_t> dim = {i,j,k} ;
                     Matrix3D<int> m(i,j,k,n) ;
                     for(size_t l=0; l<m.get_data_size(); l++)
                     {   std::vector<size_t> coord = convert_to_coord(m, l) ;
                         m.set(coord[0], coord[1], coord[2], l) ;
                     }
                     for(size_t l=0; l<m.get_data_size(); l++)
                     {   CHECK_EQUAL(l, m.get(l)) ; }
                 }
             }
         }
     }
 
     // tests the parenthesis operator
     TEST(parenthesis_operator)
     {   int n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   std::vector<size_t> dim = {i,j,k} ;
                     Matrix3D<int> m(i,j,k,n) ;
                     for(size_t l=0; l<m.get_data_size(); l++)
                     {   std::vector<size_t> coord = convert_to_coord(m, l) ;
                         m(coord[0], coord[1], coord[2]) = l ;
                     }
                     for(size_t l=0; l<m.get_data_size(); l++)
                     {   CHECK_EQUAL(l, m.get(l)) ; }
                 }
             }
         }
     }
 
     // tests file format, writting a matrix and reading it should return the
     // same matrix, uses set() and the == operator
     TEST(file_format)
     {   for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   Matrix3D<int> m(i,j,k) ;
                     for(size_t a=0; a<m.get_data_size(); a++)
                     {   m.set(a, a) ; }
                     std::ofstream file("./src/Unittests/data/matrix3d_out.mat") ;
                     // write to file
                     file << m << std::endl ;
                     file.close() ;
                     // read from file
                     Matrix3D<int> m2("./src/Unittests/data/matrix3d_out.mat") ;
                     // any matrix with at least one zero dimension is a null
                     // matrix
                     if(i==0 or j==0 or k==0)
                     {   CHECK_EQUAL(Matrix3D<int>(0,0,0), m2) ; }
                     else
                     {    CHECK_EQUAL(m, m2) ; }
                 }
             }
         }
     }
 }
 
 
 SUITE(Matrix4D)
 {
     // displays message
     TEST(message)
     {   std::cout << "Starting Matrix4D tests..." << std::endl ; }
 
     // constructor
     TEST(constructor)
     {   for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   for(size_t l=0; l<10; l++)
                     {   std::vector<size_t> dim = {i,j,k,l} ;
                         Matrix4D<int> m(i,j,k,l) ;
                         CHECK_EQUAL(dim.size(), m.get_dim_size()) ;
                         CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ;
                         CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies<int>()),
                                     m.get_data_size()) ;
                     }
                 }
             }
         }
     }
 
     // test constructor value
     TEST(constructor_value)
     {   int  n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   for(size_t l=0; l<10; l++)
                     {   std::vector<size_t> dim = {i,j,k,l} ;
                         Matrix4D<int> m(i,j,k,l,n) ;
                         CHECK_EQUAL(dim.size(), m.get_dim_size()) ;
                         CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ;
                         CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies<int>()),
                                     m.get_data_size()) ;
                         for(const auto& i : m.get_data())
                         {   CHECK_EQUAL(n, i) ; }
                     }
                 }
             }
         }
     }
 
     // tests copy constructor
     TEST(constructor_copy)
     {   int  n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   for(size_t l=0; l<10; l++)
                     {   std::vector<size_t> dim = {i,j,k,l} ;
                         Matrix4D<int> m(i,j,k,l,n) ;
                         Matrix4D<int> m2(m) ;
                         CHECK_EQUAL(m, m2) ;
                     }
                 }
             }
         }
     }
 
     // tests contructor from file, uses the == operator
     TEST(constructor_file)
     {
         std::string file_int1("./src/Unittests/data/matrix4d_int1.mat") ;
         std::string file_int2("./src/Unittests/data/matrix4d_int2.mat") ;
         std::string file_int3("./src/Unittests/data/matrix4d_int3.mat") ;
         std::string file_int4("./src/Unittests/data/matrix4d_int4.mat") ;
         std::string file_int5("./src/Unittests/data/matrix4d_int5.mat") ;
         std::string file_int6("./src/Unittests/data/matrix4d_int6.mat") ;
         std::string file_int7("./src/Unittests/data/matrix4d_int7.mat") ;
         std::string file_int8("./src/Unittests/data/matrix4d_int8.mat") ;
         std::string file_int9("./src/Unittests/data/matrix4d_int9.mat") ;
         std::string file_int10("./src/Unittests/data/matrix4d_int10.mat") ;
         std::string file_int11("./src/Unittests/data/matrix4d_int11.mat") ;
         std::string file_int12("./src/Unittests/data/matrix4d_int12.mat") ;
         std::string file_int13("./src/Unittests/data/matrix4d_int13.mat") ;
         std::string file_int14("./src/Unittests/data/matrix4d_int14.mat") ;
         std::string file_int15("./src/Unittests/data/matrix4d_int15.mat") ;
         std::string file_int16("./src/Unittests/data/matrix4d_int16.mat") ;
         std::string file_int17("./src/Unittests/data/matrix4d_int17.mat") ;
         std::string file_int18("./src/Unittests/data/matrix4d_int18.mat") ;
         std::string file_int19("./src/Unittests/data/matrix4d_int19.mat") ;
         std::string file_int20("./src/Unittests/data/matrix4d_int20.mat") ;
         std::string file_dbl1("./src/Unittests/data/matrix4d_double1.mat") ;
         std::string file_ghost("./src/Unittests/data/foo.mat") ;
 
 
         std::vector<int> v_int = { 1, 2, 3,
                                    4, 5, 6,
                                    7, 8, 9,
                                   10,11,12,
                                   13,14,15,
                                   16,17,18,
                                   19,20,21,
                                   22,23,24,
                                    1, 2, 3,
                                    4, 5, 6,
                                    7, 8, 9,
                                   10,11,12,
                                   13,14,15,
                                   16,17,18,
                                   19,20,21,
                                   22,23,24} ;
 
         std::vector<double> v_dbl = { 1, 2, 3,
                                       4, 5, 6,
                                       7, 8, 9,
                                       10,11,12,
                                       13,14,15,
                                       16,17,18,
                                       19,20,21,
                                       22,23,24,
                                        1, 2, 3,
                                        4, 5, 6,
                                        7, 8, 9,
                                       10,11,12,
                                       13,14,15,
                                       16,17,18,
                                       19,20,21,
                                       22,23,24} ;
 
         std::vector<size_t> dim = {2,3,2,4} ;
 
         // matrix of int
         Matrix4D<int> m_int(file_int1) ;
         CHECK_EQUAL(dim.size(), m_int.get_dim_size()) ;
         CHECK_ARRAY_EQUAL(dim, m_int.get_dim(), dim.size()) ;
         CHECK_EQUAL(v_int.size(), m_int.get_data_size()) ;
         CHECK_ARRAY_EQUAL(v_int, m_int.get_data(), v_int.size()) ;
 
         // matrix with only 1 int
         Matrix4D<int> m_int2(file_int18) ;
         CHECK_EQUAL(Matrix4D<int>(1,1,1,1,1), m_int2) ;
 
         // empty matrix (empty file)
         Matrix4D<int> m_int3(file_int19) ;
         CHECK_EQUAL(Matrix4D<int>(0,0,0,0), m_int3) ;
 
         // empty matrix (only eol in file)
         Matrix4D<int> m_int4(file_int20) ;
         CHECK_EQUAL(Matrix4D<int>(0,0,0,0), m_int3) ;
 
 
         // these files are not well formatted
         CHECK_THROW(Matrix4D<int> m_int5(file_int2), std::runtime_error) ;  // empty lines
         CHECK_THROW(Matrix4D<int> m_int5(file_int3), std::runtime_error) ;  // empty lines
         CHECK_THROW(Matrix4D<int> m_int5(file_int4), std::runtime_error) ;  // empty lines
         CHECK_THROW(Matrix4D<int> m_int5(file_int5), std::runtime_error) ;  // empty lines
         CHECK_THROW(Matrix4D<int> m_int5(file_int6), std::runtime_error) ;  // empty lines
         CHECK_THROW(Matrix4D<int> m_int5(file_int7), std::runtime_error) ;  // first line problem
         CHECK_THROW(Matrix4D<int> m_int5(file_int8), std::runtime_error) ;  // first line problem
         CHECK_THROW(Matrix4D<int> m_int5(file_int9), std::runtime_error) ;  // first line problem
         CHECK_THROW(Matrix4D<int> m_int5(file_int10), std::runtime_error) ; // second line problem
         CHECK_THROW(Matrix4D<int> m_int5(file_int11), std::runtime_error) ; // extra column
         CHECK_THROW(Matrix4D<int> m_int5(file_int12), std::runtime_error) ; // missing column
         CHECK_THROW(Matrix4D<int> m_int5(file_int13), std::runtime_error) ; // extra row
         CHECK_THROW(Matrix4D<int> m_int5(file_int14), std::runtime_error) ; // extra 2d slice
         CHECK_THROW(Matrix4D<int> m_int5(file_int15), std::runtime_error) ; // extra 2d slice
         CHECK_THROW(Matrix4D<int> m_int5(file_int16), std::runtime_error) ; // last line problem
         CHECK_THROW(Matrix4D<int> m_int5(file_int17), std::runtime_error) ; // mixded data types
 
         // this file does not exist
         CHECK_THROW(Matrix4D<int> m_int3(file_ghost), std::runtime_error) ;
 
         // matrix of double
         Matrix4D<double> m_dbl(file_dbl1) ;
         CHECK_EQUAL(dim.size(), m_dbl.get_dim_size()) ;
         CHECK_ARRAY_EQUAL(dim, m_dbl.get_dim(), dim.size()) ;
         CHECK_EQUAL(v_dbl.size(), m_dbl.get_data_size()) ;
         CHECK_ARRAY_EQUAL(v_dbl, m_dbl.get_data(), v_dbl.size()) ;
     }
 
     // tests get()
     TEST(get)
     {   int n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   for(size_t l=0; l<10; l++)
                     {   std::vector<size_t> dim = {i,j,k,l} ;
                         Matrix4D<int> m(i,j,k,l,n) ;
                         for(size_t a=0; a<m.get_data_size(); a++)
                         {   std::vector<size_t> coord = convert_to_coord(m, a) ;
                             CHECK_EQUAL(m.get(a), m.get(coord[0], coord[1], coord[2], coord[3])) ;
                         }
                     }
                 }
             }
         }
     }
 
     // tests set()
     TEST(set)
     {   int n = 999 ;
         for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   for(size_t l=0; l<10; l++)
                     {   std::vector<size_t> dim = {i,j,k,l} ;
                         Matrix4D<int> m(i,j,k,n) ;
                         for(size_t a=0; a<m.get_data_size(); a++)
                         {   std::vector<size_t> coord = convert_to_coord(m, a) ;
                             m.set(coord[0], coord[1], coord[2], coord[3], a) ;
                         }
                         for(size_t a=0; a<m.get_data_size(); a++)
                         {   CHECK_EQUAL(a, m.get(a)) ; }
                     }
                 }
             }
         }
     }
 
     // tests file format, writting a matrix and reading it should return the
     // same matrix, uses set() and the == operator
     TEST(file_format)
     {   for(size_t i=0; i<10; i++)
         {   for(size_t j=0; j<10; j++)
             {   for(size_t k=0; k<10; k++)
                 {   for(size_t l=0; l<10; l++)
                     {   std::vector<size_t> dim = {i,j,k,l} ;
                         Matrix4D<int> m(i,j,k,l) ;
                         for(size_t a=0; a<m.get_data_size(); a++)
                         {   m.set(a, a) ; }
                         std::ofstream file("./src/Unittests/data/matrix4d_out.mat") ;
                         // write to file
                         file << m << std::endl ;
                         file.close() ;
                         // read from file
                         Matrix4D<int> m2("./src/Unittests/data/matrix4d_out.mat") ;
                         // any matrix with at least one zero dimension is a null
                         // matrix
                         if(i==0 or j==0 or k==0 or l==0)
                         {   CHECK_EQUAL(Matrix4D<int>(0,0,0,0), m2) ; }
                         else
                         {    CHECK_EQUAL(m, m2) ; }
                     }
                 }
             }
         }
     }
 }
-*/
+
diff --git a/src/Utility/dna_utility.cpp b/src/Utility/dna_utility.cpp
index a2f22ad..11af0e0 100644
--- a/src/Utility/dna_utility.cpp
+++ b/src/Utility/dna_utility.cpp
@@ -1,151 +1,142 @@
 #include<dna_utility.hpp>
 
 #include <string>
 #include <unordered_map>
 
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
 #include <seqan/seq_io.h>   // seqan::SeqFileIn
 
 
 int dna::map(char base, bool rev_compl)
 {
     static bool init  = false ;
     static std::unordered_map<char,int> hash_map ;
     static std::unordered_map<char,int> hash_map_rev ;
     if(not init)
     {   hash_map['A'] = 0 ;
         hash_map['a'] = 0 ;
         hash_map['C'] = 1 ;
         hash_map['c'] = 1 ;
         hash_map['G'] = 2 ;
         hash_map['g'] = 2 ;
         hash_map['T'] = 3 ;
         hash_map['t'] = 3 ;
         hash_map['N'] = 4 ;
         hash_map['n'] = 4 ;
 
         hash_map_rev['A'] = hash_map['T'] ;
         hash_map_rev['a'] = hash_map['t'] ;
         hash_map_rev['C'] = hash_map['G'] ;
         hash_map_rev['c'] = hash_map['g'] ;
         hash_map_rev['G'] = hash_map['C'] ;
         hash_map_rev['g'] = hash_map['c'] ;
         hash_map_rev['T'] = hash_map['A'] ;
         hash_map_rev['t'] = hash_map['a'] ;
         hash_map_rev['N'] = hash_map['N'] ;
         hash_map_rev['n'] = hash_map['n'] ;
 
         init = true ;
     }
 
     try
     {   if(rev_compl)
         {   return hash_map_rev.at(base) ; }
         else
         {   return hash_map.at(base) ; }
     }
     // key could not be found
     catch(std::out_of_range& e)
     {   char msg[256] ;
         sprintf(msg, "Error! Invalid DNA base : %c", base) ;
         throw std::invalid_argument(msg) ;
     }
 }
 
 char dna::map(int base, bool rev_compl)
 {
     static bool init  = false ;
     static std::unordered_map<int,char> hash_map ;
     static std::unordered_map<int,char> hash_map_rev ;
     if(not init)
     {   hash_map[0] = 'A' ;
         hash_map[1] = 'C' ;
         hash_map[2] = 'G' ;
         hash_map[3] = 'T' ;
         hash_map[4] = 'N' ;
 
         hash_map_rev[4] = hash_map[4] ;
         hash_map_rev[3] = hash_map[0] ;
         hash_map_rev[2] = hash_map[1] ;
         hash_map_rev[1] = hash_map[2] ;
         hash_map_rev[0] = hash_map[3] ;
 
         init = true ;
     }
 
     try
     {   if(rev_compl)
         {   return hash_map_rev.at(base) ; }
         else
         {   return hash_map.at(base) ; }
     }
     // key could not be found
     catch(std::out_of_range& e)
     {   char msg[256] ;
         sprintf(msg, "Error! Invalid DNA code : %i", base) ;
         throw std::invalid_argument(msg) ;
     }
 }
 
 int dna::char_to_int(char c, bool rev_compl)
 {   return dna::map(c, rev_compl) ; }
 
-matrix2d_i dna::char_to_int(const matrix2d_c& matrix)
+Matrix2D<int> dna::char_to_int(const Matrix2D<int>& matrix)
 {
-    size_t n_row = matrix.size() ;
-    size_t n_col = matrix[0].size() ;
+    size_t n_row = matrix.get_nrow() ;
+    size_t n_col = matrix.get_ncol() ;
 
-    matrix2d_i data_int(n_row,
-                         vector_i(n_col)) ;
+    Matrix2D<int> data_int(n_row, n_col) ;
     for(size_t i=0; i<n_row; i++)
     {   for(size_t j=0; j<n_col; j++)
-        {   data_int[i][j] = dna::char_to_int(matrix[i][j]) ; }
+        {   data_int(i,j) = dna::char_to_int(matrix(i,j)) ; }
     }
     return data_int ;
 }
 
 char dna::int_to_char(int n, bool rev_compl)
 {   return dna::map(n, rev_compl) ; }
 
-matrix2d_i dna::read_fasta(const std::string& file_address)
+std::vector<double> dna::base_composition(const Matrix2D<int>& sequences, bool both_strands)
 {
-    // open  file
-    seqan::SeqFileIn file_in;
-    if (not seqan::open(file_in, file_address.c_str()))
-    {   char msg[4096] ;
-        sprintf(msg, "Error! Could not open %s",
-                file_address.c_str()) ;
-        throw std::invalid_argument(msg) ;
-    }
-
-    // read
-    matrix2d_i seq_matrix ;
-    seqan::CharString id ;
-    seqan::Dna5String seq ;
-    size_t i = 0 ;
-    size_t seq_l = 0;
-    while(not seqan::atEnd(file_in))
-    {   seqan::readRecord(id, seq, file_in) ;
-        // get sequence length
-        if(i == 0)
-        {   seq_l = seqan::length(seq) ; }
-        // sequence length should be constant
-        else if(seqan::length(seq) != seq_l)
-        {   char msg[4096] ;
-            sprintf(msg, "Error! Sequences of variable length in %s",
-                    file_address.c_str()) ;
-            throw std::invalid_argument(msg) ;
+    double total = 0. ;
+    std::vector<double> base_comp(4,0.) ;
+
+    int base_N = dna::map('N') ;
+
+    for(size_t i=0; i<sequences.get_nrow(); i++)
+    {   for(size_t j=0; j<sequences.get_ncol(); j++)
+        {   // forward strand
+            int base = sequences(i,j) ;
+            // N participates to A,C,G,T -> adding a count to each
+            // is equivalent to not changing anything
+            if(base == base_N)
+            {   continue ; }
+            else
+            {   base_comp[base] += 1; 
+                total += 1. ;
+            }
+            // reverse complement strand
+            if(both_strands)
+            {   // size_t c_hash_rev = dna::hash(c, true) ;
+                base_comp[4-base-1] += 1. ;
+                total += 1. ;
+            }
         }
-        // store
-        seq_matrix.push_back(vector_i(seq_l)) ;
-        for(size_t j=0; j<seq_l; j++)
-        {   seq_matrix[i][j] = seq[j] ; }
-        i++ ;
     }
 
-    // close file
-    seqan::close(file_in) ;
+    // normalize
+    for(auto& i : base_comp)
+    {   i /= total ; }
 
-    return seq_matrix ;
+    return base_comp ;
 }
-
diff --git a/src/Utility/dna_utility.hpp b/src/Utility/dna_utility.hpp
index e189fe7..67f4e5b 100644
--- a/src/Utility/dna_utility.hpp
+++ b/src/Utility/dna_utility.hpp
@@ -1,80 +1,80 @@
 #ifndef DNA_UTILITY_HPP
 #define DNA_UTILITY_HPP
 
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
 
 namespace dna
 {
     /*!
      * \brief Contains the mapping to convert
      * DNA characters to integer codes.
      * Lower and capital characters are accepted.
      * \param base the character of interest.
      * \param rev_compl whether the reverse
      * complement of the character is of interest.
      * \return the corresponding DNA code.
      */
     int map(char base, bool rev_compl=false) ;
 
     /*!
      * \brief Contains the mapping to convert
      * DNA code to characters.
      * Only capital characters are returned.
      * \param base the code of interest.
      * \param rev_compl whether the reverse
      * complement of the code is of interest.
      * \return the corresponding DNA character.
      */
     char map(int base, bool rev_compl=false) ;
 
     /*!
      * \brief Converts a DNA character (A, C,
      * G, T) to an integer.
      * \param c the DNA character of interest.
      * \return the character integer code.
      * \throw std::invalid_argument if the
      * given character is not a valid DNA
      * character.
      */
     int char_to_int(char c, bool rev_compl= false) ;
 
     /*!
      * \brief Converts a DNA character matrix (A, C,
      * G, T) to an integer matrix.
      * The DNA characters are converted using
      * SequenceLayer::char_to_int(char).
      * param file_address the address of the file to load.
      * \return the corresponding int matrix.
      */
-    matrix2d_i char_to_int(const matrix2d_c& matrix) ;
+    Matrix2D<int> char_to_int(const Matrix2D<int>& matrix) ;
 
     /*!
      * \brief Converts an int DNA code to
      * a regular DNA character (A, C, G, T).
      * This method is the reverse method of
      * char_to_int(char).
      * \param n the DNA code of interest.
      * \return the DNA character.
      * \throw std::invalid_argument if the
      * given int is not a valid DNA
      * code.
      */
     char int_to_char(int n, bool rev_compl=false) ;
 
     /*!
-     * \brief Loads the content of a fasta file and stores the
-     * data in a int matrix where each row contains one sequence.
-     * The sequence in the file should all have the same length.
-     * The DNA characters are converted using
-     * SequenceLayer::char_to_int(char) ;
-     * \param file_address the address of the file to load.
-     * \throw std::invalid_argument if the file cannot be read,
-     * if an invalid DNA character is detected in the sequences
-     * or if the sequences have variable lengths.
-     * \return a matrix containing the sequences on the rows and
-     * the characters over the columns.
+     * \brief Computes the base composition of a set of 
+     * sequences, in integer format, contained in a matrix.
+     * \param sequences a matrix containing the sequences 
+     * of interest.
+     * \param both_strands also accounts for the reverse 
+     * complement of the sequences.
+     * \throw std::invalid_argument if a non-supported 
+     * character is found in the matrix.
+     * \return a vector of 4 values corresponding to the
+     * frequencies of A,C,G and T
+     * respectively.
      */
-    matrix2d_i read_fasta(const std::string& file_address) ;
+    std::vector<double> base_composition(const Matrix2D<int>& sequences, bool both_strands) ;
 }
 
 #endif // DNA_UTILITY_HPP
diff --git a/src/main_cormat.cpp b/src/main_cormat.cpp
index 3328b91..1efb02d 100644
--- a/src/main_cormat.cpp
+++ b/src/main_cormat.cpp
@@ -1,187 +1,261 @@
-#include <seqan/bam_io.h>
 #include <iostream>
 #include <utility>
 #include <vector>
+#include <string>
 
-#include <matrices.hpp>
-#include <GenomeRegion.hpp>
-#include <CorrelationMatrixCreator.hpp>
-#include <SequenceMatrixCreator.hpp>
+#include <boost/date_time/posix_time/ptime.hpp>
+#include <boost/date_time/microsec_time_clock.hpp>
 
-using namespace seqan;
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
+#include <Matrix4D.hpp>
 
+#include <Random.hpp>
 
-template<class T>
-std::ostream& operator << (std::ostream& stream, const std::vector<T>& v)
+class TestTimer
 {
-    for(const auto& p : v)
-    {   stream << p << " " ; }
-    return stream ;
+    public:
+        TestTimer(const std::string & name) : name(name),
+            start(boost::date_time::microsec_clock<boost::posix_time::ptime>::local_time())
+        {
+        }
+
+        ~TestTimer()
+        {
+            using namespace std;
+            using namespace boost;
+
+            posix_time::ptime now(date_time::microsec_clock<posix_time::ptime>::local_time());
+            posix_time::time_duration d = now - start;
+
+            cout << name << " completed in " << d.total_milliseconds() / 1000.0 <<
+                " seconds" << endl;
+        }
+
+    private:
+        std::string name;
+        boost::posix_time::ptime start;
+};
+
+
+void f_vector2d(size_t nrow, size_t ncol)
+{   std::vector<std::vector<int>> m ;
+
+    {   TestTimer timer("f_vector2d init") ;
+        m = std::vector<std::vector<int>>(nrow,
+                                          std::vector<int>(ncol, 0.)) ;
+        for(size_t i=0; i<nrow; i++)
+        {   for(size_t j=0; j<ncol; j++)
+            {   m[i][j] = 0. ; }
+        }
+    }
+
+    {   TestTimer timer("f_vector2d writting") ;
+        for(size_t i=0; i<nrow; i++)
+        {   for(size_t j=0; j<ncol; j++)
+            {   m[i][j] = i*j ; }
+        }
+    }
+
+    {   TestTimer timer("f_vector2d reading") ;
+        for(size_t i=0; i<nrow; i++)
+        {   for(size_t j=0; j<ncol; j++)
+            {   if(m[i][j]) {;} }
+        }
+    }
 }
 
-template<class T, class U>
-std::ostream& operator << (std::ostream& stream, const std::pair<T,U>& p)
-{
-    stream << "[" << p.first << " " << p.second << "] " ;
-    return stream ;
+void f_matrix2d(size_t nrow, size_t ncol)
+{   Matrix2D<int> m ;
+
+    {   TestTimer timer("f_matrix2d init") ;
+        m = Matrix2D<int>(nrow, ncol, 0.) ;
+    }
+
+    {   TestTimer timer("f_matrix2d writting") ;
+        for(size_t i=0; i<nrow; i++)
+        {   for(size_t j=0; j<ncol; j++)
+            {   m(i,j) = i*j ; }
+        }
+    }
+
+    {   TestTimer timer("f_matrix2d reading") ;
+        for(size_t i=0; i<nrow; i++)
+        {   for(size_t j=0; j<ncol; j++)
+            {   if(m(i,j)) {;} }
+        }
+    }
+}
+
+void f_vector3d(size_t dim1, size_t dim2, size_t dim3)
+{   std::vector<std::vector<std::vector<int>>> m ;
+
+    {   TestTimer timer("f_vector3d init") ;
+        m = std::vector<std::vector<std::vector<int>>>(dim1,
+                         std::vector<std::vector<int>>(dim2,
+                                      std::vector<int>(dim3,0.))) ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   m[i][j][k] = 0. ; }
+            }
+        }
+    }
+
+    {   TestTimer timer("f_vector3d writting") ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   m[i][j][k] = i*j*k ; }
+            }
+        }
+    }
+
+    {   TestTimer timer("f_vector3d reading") ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   if(m[i][j][k]) {;} }
+            }
+        }
+    }
 }
 
-/*
-std::pair<int, int> get_bin_indices(const GenomeRegion& target,
-                                    const std::vector<GenomeRegion>& bins)
-{   // the bin range and chromosome
-    int chromosome_idx = bins.front().chromosome_idx ;
-    int bin_size = bins.front().end - bins.front().start ;
-    int from = bins.front().start ;
-    int to   = bins.back().end ;
-
-    // not on the same chromosome
-    if(target.chromosome_idx != chromosome_idx)
-    {   return std::make_pair(0,0) ; }
-    // target goes over all bins
-    else if(target.start <= from and
-       target.end   >= to)
-    {   return std::make_pair(0, bins.size()) ; }
-    // check if overlap
-    else
-    {   // define whether target limits are inside
-        int bin_start = -1 ;
-        int bin_end   = -1 ;
-
-        // define whether target limits are inside
-        bool target_start_in = false ;
-        bool target_end_in   = false ;
-        if(target.start >= from and
-           target.start < to)
-        {   target_start_in = true ; }
-        if(target.end >  from and
-           target.end <= to)
-        {    target_end_in = true ; }
-
-        // start
-        if(not target_start_in)
-        {   bin_start = 0 ; }
-        else
-        {   bin_start = (target.start - from) / bin_size ; }
-
-        // end
-        if(target_start_in and not target_end_in)
-        {   bin_end = bin_start + 1 ; }
-        else if(not target_start_in and not target_end_in)
-        {   bin_end = 0 ; }
-        else
-        {   bin_end = ((target.end - 1 - from) / bin_size) + 1 ; }
-
-        return std::make_pair(bin_start, bin_end) ;
+void f_matrix3d(size_t dim1, size_t dim2, size_t dim3)
+{   Matrix3D<int> m ;
+
+    {   TestTimer timer("f_matrix3d init") ;
+        m = Matrix3D<int>(dim1, dim2, dim3) ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   m(i,j,k) = 0. ; }
+            }
+        }
+    }
+
+    {   TestTimer timer("f_matrix3d writting") ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   m(i,j,k) = i*j*k ; }
+            }
+        }
+    }
+
+    {   TestTimer timer("f_matrix3d reading") ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   if(m(i,j,k)) {;} }
+            }
+        }
     }
 }
-*/
-
-std::pair<int, int> get_bin_indices_naive(const GenomeRegion& target,
-                                          const std::vector<GenomeRegion>& bins)
-{   int bin_start = 0 ;
-    int bin_end   = 0 ;
-
-    GenomeRegion range(bins.front().chromosome,
-                        bins.front().chromosome_idx,
-                        bins.front().start,
-                        bins.back().end) ;
-
-    // no overlap
-    if(not (target | range))
-    {   return std::make_pair(0,0) ; }
-    else
-    {   // start
-        if(target.start < bins.front().start)
-        {   bin_start = 0 ; }
-        else
-        {   for(int i=0; i< (int)bins.size(); i++)
-            {   if(target.start >= bins[i].start and
-                   target.start <  bins[i].end)
-                {   bin_start = i ;
-                    break ;
+
+void f_vector4d(size_t dim1, size_t dim2, size_t dim3, size_t dim4)
+{   std::vector<std::vector<std::vector<std::vector<int>>>> m ;
+
+    {   TestTimer timer("f_vector4d init") ;
+        m = std::vector<std::vector<std::vector<std::vector<int>>>>(dim1,
+                         std::vector<std::vector<std::vector<int>>>(dim2,
+                                      std::vector<std::vector<int>>(dim3,
+                                                   std::vector<int>(dim4, 0)))) ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   for(size_t l=0; l<dim4; l++)
+                    {   m[i][j][k][l] = 0. ; }
                 }
             }
         }
-        // end
-        if(target.end > bins.back().end)
-        {   bin_end = bins.size() ; }
-        else
-        {
-            for(int i=0; i<(int)bins.size(); i++)
-            {   if(target.end <= bins[i].end and
-                 target.end >  bins[i].start)
-                {   bin_end = i+1 ;
-                    break ;
+    }
+
+    {   TestTimer timer("f_vector4d writting") ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   for(size_t l=0; l<dim4; l++)
+                    {   m[i][j][k][l] = i*j*k*l ; }
+                }
+            }
+        }
+    }
+
+    {   TestTimer timer("f_vector4d reading") ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   for(size_t l=0; l<dim4; l++)
+                    {   if(m[i][j][k][l]) {;} }
                 }
             }
         }
-        return std::make_pair(bin_start, bin_end) ;
     }
 }
 
-std::pair<int, int> get_bin_indices(const GenomeRegion& target,
-                                    const std::vector<GenomeRegion>& bins)
-{   // the bin range and chromosome
-    GenomeRegion range(bins.front().chromosome,
-                        bins.front().chromosome_idx,
-                        bins.front().start,
-                        bins.back().end) ;
-    // no overlap
-    if(not (target | range))
-    {   return std::make_pair(0,0) ; }
-    // overlap
-    else
-    {   // target goes over all bins
-        if(target.start <= range.start and
-           target.end   >= range.end)
-        {   return std::make_pair(0, bins.size()) ; }
-        else
-        {   int bin_start = -1 ;
-            int bin_end   = -1 ;
-            int bin_size = bins.front().end - bins.front().start ;
-
-            // start
-            if(target.start <= range.start)
-            {   bin_start = 0 ; }
-            else
-            {   bin_start = (target.start - range.start) / bin_size ; }
-
-            // end
-            if(target.end >= range.end)
-            {   bin_end = bins.size() ; }
-            else
-            {   bin_end = ((target.end - 1 - range.start) / bin_size) + 1 ; }
-            return std::make_pair(bin_start, bin_end) ;
+void f_matrix4d(size_t dim1, size_t dim2, size_t dim3, size_t dim4)
+{   Matrix4D<int> m ;
+
+    {   TestTimer timer("f_matrix4d init") ;
+        m = Matrix4D<int>(dim1, dim2, dim3,dim4, 0) ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   for(size_t l=0; l<dim4; l++)
+                    {   m(i,j,k,l) = 0. ; }
+                }
+            }
+        }
+    }
+
+    {   TestTimer timer("f_matrix4d writting") ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   for(size_t l=0; l<dim4; l++)
+                    {   m(i,j,k,l) = i*j*k*l ; }
+                }
+            }
+        }
+    }
+
+    {   TestTimer timer("f_matrix4d reading") ;
+        for(size_t i=0; i<dim1; i++)
+        {   for(size_t j=0; j<dim2; j++)
+            {   for(size_t k=0; k<dim3; k++)
+                {   for(size_t l=0; l<dim4; l++)
+                    {   if(m(i,j,k,l)) {;} }
+                }
+            }
         }
     }
 }
 
+
 int main()
-{
-    /*
-    std::string bed = "data/10xgenomics_PBMC_5k/ctcf_motifs_10e-6.bed" ;
-    std::string bam = "data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam" ;
-    std::string bai = "data/10xgenomics_PBMC_5k/atac_v1_pbmc_5k_possorted_filtered_30-84bp.bam.bai" ;
-
-    CorrelationMatrixCreator mc(bed,
-                                bam,
-                                bai,
-                                -400,
-                                 400,
-                                  1,
-                                CorrelationMatrixCreator::methods::READ_ATAC) ;
-
-    mc.create_matrix() ;
-    */
-    std::string bed   = "data/10xgenomics_PBMC_5k/ctcf_motifs_10e-6.bed" ;
-    std::string fasta = "data/genomes/hg19.fasta" ;
+{   /*
+    TestTimer timer("main") ;
 
-    SequenceMatrixCreator mc(bed,
-                             fasta,
-                             -400,
-                              400) ;
-    std::cout << mc.create_matrix() << std::endl ;
+    size_t nrow = 10000 ;
+    size_t ncol = 10000 ;
+
+    f_vector2d(nrow, ncol) ;
+    f_matrix2d(nrow, ncol) ;
+
+    size_t dim1 = 100  ;
+    size_t dim2 = 100  ;
+    size_t dim3 = 10   ;
+    size_t dim4 = 10   ;
+
+    f_vector3d(dim1, dim2, dim3) ;
+    f_matrix3d(dim1, dim2, dim3) ;
+
+    f_vector4d(dim1, dim2, dim3, dim4) ;
+    f_matrix4d(dim1, dim2, dim3, dim4) ;
+    */
 
+    std::vector<double> v((size_t)70462*(size_t)23*(size_t)971*(size_t)2) ;
+    std::cout << "vector allocated" << std::endl ;
     return 0;
 }
diff --git a/src/main_em.cpp b/src/main_em.cpp
index 9b5b30e..954a16c 100644
--- a/src/main_em.cpp
+++ b/src/main_em.cpp
@@ -1,93 +1,92 @@
 #include <iostream>
 #include <string>
 #include <fstream>
 #include <vector>
 
 #include <EMEngine.hpp>
 #include <dna_utility.hpp>
-#include <matrices.hpp>
 
 using namespace std ;
 
 void get_size(const vector<vector<vector<vector<vector<int>>>>>& m)
 {   size_t size_d  = 0 ;
     size_t size_m4 = 0 ;
     size_t size_m3 = 0 ;
     size_t size_m2 = 0 ;
     size_t size_m  = 0 ;
 
     std::cout << "sizeof m             : " << sizeof(m)             << std::endl ;
     std::cout << "sizeof m[0]          : " << sizeof(m[0])          << std::endl ;
     std::cout << "sizeof m[0][0]       : " << sizeof(m[0][0])       << std::endl ;
     std::cout << "sizeof m[0][0][0]    : " << sizeof(m[0][0][0])    << std::endl ;
     std::cout << "sizeof m[0][0][0][0] : " << sizeof(m[0][0][0][0]) << std::endl ;
 
     for(const auto& m4 : m)
     {   size_m4 += sizeof(m4) ;
         for(const auto& m3 : m4)
         {   size_m3 += sizeof(m3) ;
             for(const auto& m2 : m3)
             {   size_m2 += sizeof(m2) ;
                 for(const auto& m : m2)
                 {  size_m += sizeof(m) ;
                     size_d += m.capacity() * sizeof(int) ;
                 }
             }
         }
     }
     std::cout << "size of matrix"    << std::endl
               << "size of m4     : " << size_m4 << std::endl
               << "size of m3     : " << size_m3 << std::endl
               << "size of m2     : " << size_m2 << std::endl
               << "size of m      : " << size_m  << std::endl
               << "size of data   : " << size_d  << std::endl ;
 }
 
 
 void get_size(const vector<vector<vector<vector<int>>>>& m)
 {   size_t size_d  = 0 ;
     size_t size_m3 = 0 ;
     size_t size_m2 = 0 ;
     size_t size_m  = 0 ;
 
     std::cout << "sizeof m          : " << sizeof(m)          << std::endl ;
     std::cout << "sizeof m[0]       : " << sizeof(m[0])       << std::endl ;
     std::cout << "sizeof m[0][0]    : " << sizeof(m[0][0])    << std::endl ;
     std::cout << "sizeof m[0][0][0] : " << sizeof(m[0][0][0]) << std::endl ;
 
     for(const auto& m3 : m)
     {   size_m3 += sizeof(m3) ;
         for(const auto& m2 : m3)
         {   size_m2 += sizeof(m2) ;
             for(const auto& m : m2)
             {  size_m += sizeof(m) ;
                 size_d += m.capacity() * sizeof(int) ;
             }
         }
     }
     std::cout << "size of matrix"    << std::endl
               << "size of m3     : " << size_m3 << std::endl
               << "size of m2     : " << size_m2 << std::endl
               << "size of m      : " << size_m  << std::endl
               << "size of data   : " << size_d  << std::endl ;
 }
 
 void get_size(const vector<int>& v)
 {   std::cout << "sizeof v     : " << sizeof(v)              << std::endl ;
     std::cout << "size of data : " << v.size() * sizeof(int) << std::endl ;
 }
 
 int main()
 {
     vector<vector<vector<vector<vector<int>>>>>
             m(2,
               vector<vector<vector<vector<int>>>>(97998,
                                                   vector<vector<vector<int>>>(5,
                                                                               vector<vector<int>>(201,
                                                                                                   vector<int>(2))))) ;
 
     get_size(m) ;
     vector<int> v(97998*5*201*2) ;
     get_size(v) ;
     return EXIT_SUCCESS ;
 }
diff --git a/src/main_em2.cpp b/src/main_em2.cpp
index 41702c1..5e7e185 100644
--- a/src/main_em2.cpp
+++ b/src/main_em2.cpp
@@ -1,35 +1,36 @@
 #include <iostream>
 #include <string>
 
 #include <EMEngine.hpp>
-#include <matrices.hpp>
+#include <Matrix2D.hpp>
+#include <Matrix3D.hpp>
 
 
 int main()
 {
 
     std::string data_path1 = "/local/groux/scATAC-seq/results/10xgenomics_PBMC_5k/"
                              "ctcf_motifs_10e-6_open_bin1bp_read_atac.mat" ;
     std::string data_path2 = "/local/groux/scATAC-seq/results/10xgenomics_PBMC_5k/"
                              "ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat" ;
 
     size_t n_class   = 10 ;
     size_t n_shift   = 21 ;
     bool   flip      = false ;
     size_t n_iter    = 20 ;
     std::string seed = "08july2019" ;
 
-    EMEngine em_new(std::vector<matrix2d_i>{read_matrix2d_i(data_path1), read_matrix2d_i(data_path2)},
-                    std::vector<matrix2d_i>{},
+    EMEngine em_new(std::vector<Matrix2D<int>>{Matrix2D<int>(data_path1), Matrix2D<int>(data_path2)},
+                    std::vector<Matrix2D<int>>{},
                     n_class,
                     n_iter,
                     n_shift,
                     flip,
                     EMEngine::seeding_codes::RANDOM,
                     seed) ;
 
     em_new.classify() ;
-    std::cout << em_new.get_read_models() << std::endl << std::endl << std::endl ;
+    em_new.get_read_models() ;
 
     return EXIT_SUCCESS ;
 }