No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Tue, Apr 29, 10:11

View Options

	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_ctcf_motif.sh
	index f5a0b24..d32dffe 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_ctcf_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_ctcf_motif.sh
	@@ -1,52 +1,52 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_1'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=28
	+n_core=32

	# open chromatin
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_1nucl_fragment_center_model.mat'
	file_mod3=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMRead --read $file_mat_open --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3
	done

	# 1nucl chromatin
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_open_read_atac_model.mat'
	file_mod3=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_sequences_model.mat'
	file_aic=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMRead --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3
	done

	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_ebf1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_ebf1_motif.sh
	index 431cde7..04c51f3 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_ebf1_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_ebf1_motif.sh
	@@ -1,52 +1,52 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_1'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/ebf1_motifs_10e-6_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/ebf1_motifs_10e-6_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/ebf1_motifs_10e-6_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=28
	+n_core=32

	# open chromatin
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_1nucl_fragment_center_model.mat'
	file_mod3=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
	file_aic=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMRead --read $file_mat_open --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3
	done

	# 1nucl chromatin
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod2=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_open_read_atac_model.mat'
	file_mod3=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_sequences_model.mat'
	file_aic=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMRead --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3
	done

	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_myc_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_myc_motif.sh
	index ec4656a..c0104d9 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_myc_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_myc_motif.sh
	@@ -1,53 +1,53 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_1'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/myc_motifs_10e-6_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/myc_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/myc_motifs_10e-6_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/myc_motifs_10e-6_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=28
	+n_core=32

	# open chromatin
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_1nucl_fragment_center_model.mat'
	file_mod3=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
	file_aic=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMRead --read $file_mat_open --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3
	done

	# 1nucl chromatin
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod2=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_open_read_atac_model.mat'
	file_mod3=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_sequences_model.mat'
	file_aic=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMRead --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3
	done


	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_sp1_motif.R b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_sp1_motif.R
	index 5971095..d30e6b1 100644
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_sp1_motif.R
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_sp1_motif.R
	@@ -1,95 +1,95 @@
	setwd(file.path("/", "local", "groux", "scATAC-seq"))

	# libraries
	library(RColorBrewer)

	# functions
	source(file.path("scripts", "functions.R"))

	# the minimum number of classes searched
	k.min = 1
	# the maximum number of classes searched
	k.max = 10

	# path to the images for the logo
	path.a = file.path("res/A.png")
	path.c = file.path("res/C.png")
	path.g = file.path("res/G.png")
	path.t = file.path("res/T.png")

	################## sequence patterns around sp1 motifs ##################

	for(k in k.min:k.max)
	{
	# open chromatin
	data = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_1",
	sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_model.mat", k)))
	model.open = data$models
	model.prob = data$prob
	data = NULL
	# nucleosomes
	model.nucl = read.read.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_1",
	- sprintf("sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_%dclass_model.mat", k)))$models
	+ sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_1nucl_fragment_center_model.mat", k)))$models
	# sequence
	model.seq = read.sequence.models(file.path("results", "10xgenomics_PBMC_5k_motifs_classification_1",
	sprintf("sp1_motifs_10e-7_open_bin1bp_read_atac_%dclass_sequences_model.mat", k)))$models

	# plot classes
	col = brewer.pal(3, "Set1")
	# X11(width=17, height=10)
	png(filename=file.path("results", "10xgenomics_PBMC_5k_motifs_classification_1",
	sprintf("sp1_motifs_10e-7_classification_open_bin1bp_%dclass.png", k)),
	units="in", res=720, width=18, height=12)
	m = matrix(1:10, nrow=5, ncol=2, byrow=F)
	layout(m)
	# order from most to least probable class
	ord = order(model.prob, decreasing=T)
	ref.open = model.open[ord,, drop=F]
	ref.nucl = model.nucl[ord,, drop=F]
	ref.seq = model.seq[,,ord, drop=F]
	prob = model.prob[ord]
	class = c(1:nrow(ref.open))[ord]
	for(i in 1:nrow(ref.open))
	{ # plot logo
	plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
	main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
	# x-axis
	x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
	x.at = (x.lab + ncol(ref.open)) / 2
	axis(1, at=x.at, labels=x.lab)
	# y-axis is [0,1] for min/max signal
	x.at = seq(0, 1, 0.5)
	axis(2, at=x.at, labels=x.at)
	# plot signal (multiplies by 2 because the y-axis goes to 2 bits)
	lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=1, col=col[1])
	lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=1, col=col[2])
	}
	row_n = 1 # row counter
	col_n = 1 # column counter
	for(i in 1:nrow(ref.open))
	{ # plot logo center
	right = 0.5*col_n - 0.01
	left = right - 0.2
	bottom = 1-(row_n*(0.2))+0.05
	top = bottom + 0.15
	par(fig=c(left, right, bottom, top), new=T)
	idx = 380:420
	plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
	# plot signal (multiplies by 2 because the y-axis goes to 2 bits)
	lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=1, col=col[1])
	lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=1, col=col[2])
	# xaxis
	x.at = 1:length(idx)
	axis(1, at=x.at, labels=x.at)
	# yaxis
	x.at = seq(0, 2, by=1)
	axis(2, at=x.at, labels=x.at)
	row_n = row_n + 1
	if(i %% 5 == 0)
	{ col_n = col_n + 1
	row_n = 1
	}
	}
	dev.off()
	}

	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_sp1_motif.sh
	index d9104d0..6c12400 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_sp1_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_1/classification_sp1_motif.sh
	@@ -1,52 +1,52 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_1'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/sp1_motifs_10e-7_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=28
	+n_core=32

	# open chromatin
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_1nucl_fragment_center_model.mat'
	file_mod3=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_sequences_model.mat'
	file_aic=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMRead --read $file_mat_open --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3
	done

	# 1nucl chromatin
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod2=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_open_read_atac_model.mat'
	file_mod3=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_sequences_model.mat'
	file_aic=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMRead --read $file_mat_1nucl --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3
	done

	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ctcf_motif.sh
	index 247a840..013fa9f 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ctcf_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ctcf_motif.sh
	@@ -1,36 +1,36 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_2'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=24
	+n_core=28

	# open chromatin and nucleosomes
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod3=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMJoint --read $file_mat_open --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3

	done
	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ebf1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ebf1_motif.sh
	index 2204ba9..e938ca5 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ebf1_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_ebf1_motif.sh
	@@ -1,36 +1,36 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_2'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/ebf1_motifs_10e-6_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/ebf1_motifs_10e-6_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/ebf1_motifs_10e-6_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=24
	+n_core=28

	# open chromatin and nucleosomes
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'ebf1_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod3=$results_dir/'ebf1_motifs_10e-6_sequences_'$k'class_model.mat'
	file_aic=$results_dir/'ebf1_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMJoint --read $file_mat_open --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3

	done
	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_myc_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_myc_motif.sh
	index 6ea2fcf..345c0cd 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_myc_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_myc_motif.sh
	@@ -1,36 +1,36 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_2'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/myc_motifs_10e-6_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/myc_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/myc_motifs_10e-6_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/myc_motifs_10e-6_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=12
	+n_core=28

	# open chromatin and nucleosomes
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'myc_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod3=$results_dir/'myc_motifs_10e-6_sequences_'$k'class_model.mat'
	file_aic=$results_dir/'myc_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMJoint --read $file_mat_open --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3

	done
	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_sp1_motif.sh
	index 5f561db..ccd2282 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_sp1_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_2/classification_sp1_motif.sh
	@@ -1,35 +1,35 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_2'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/sp1_motifs_10e-7_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=12
	+n_core=28

	# open chromatin and nucleosomes
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_1nucl_bin1bp_fragment_center_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod3=$results_dir/'sp1_motifs_10e-7_sequences_'$k'class_model.mat'
	file_aic=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMJoint --read $file_mat_open --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod3
	done
	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_ctcf_motif.sh
	index 61a8ded..bcaa4f4 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_ctcf_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_ctcf_motif.sh
	@@ -1,38 +1,38 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_3'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
	file_mat_nucl="$data_dir/ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=24
	+n_core=28

	# sequences
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_sequences_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod3=$results_dir/'ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod4=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod3
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod4
	done
	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_sp1_motif.sh
	index 54f1c72..ca5821c 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_sp1_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_3/classification_sp1_motif.sh
	@@ -1,38 +1,38 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_3'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/sp1_motifs_10e-7_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat"
	file_mat_nucl="$data_dir/sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='21'
	-n_core=24
	+n_core=28

	# sequences
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_sequences_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod3=$results_dir/'sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod4=$results_dir/'sp1_motifs_10e-7_sequences_'$k'class_model.mat'
	file_aic=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod3
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod4
	done
	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_ctcf_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_ctcf_motif.sh
	index a34f0ec..f9a59cf 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_ctcf_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_ctcf_motif.sh
	@@ -1,38 +1,38 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_4'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center.mat"
	file_mat_nucl="$data_dir/ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/ctcf_motifs_10e-6_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/ctcf_motifs_10e-6_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='1'
	-n_core=24
	+n_core=28

	# sequences
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_sequences_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'ctcf_motifs_10e-6_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod3=$results_dir/'ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod4=$results_dir/'ctcf_motifs_10e-6_sequences_'$k'class_model.mat'
	file_aic=$results_dir/'ctcf_motifs_10e-6_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod3
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod4
	done
	diff --git a/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_sp1_motif.sh b/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_sp1_motif.sh
	index cf0d6b3..82b61b4 100755
	--- a/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_sp1_motif.sh
	+++ b/scripts/10xgenomics_PBMC_5k_motifs_classification_4/classification_sp1_motif.sh
	@@ -1,38 +1,38 @@
	# some paths
	## directories
	results_dir='results/10xgenomics_PBMC_5k_motifs_classification_4'
	data_dir='data/10xgenomics_PBMC_5k_motifs'
	## input
	file_mat_open="$data_dir/sp1_motifs_10e-7_open_bin1bp_read_atac.mat"
	file_mat_1nucl="$data_dir/sp1_motifs_10e-7_1nucl_bin1bp_fragment_center.mat"
	file_mat_nucl="$data_dir/sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center.mat"
	file_mat_seq="$data_dir/sp1_motifs_10e-7_sequences.mat"

	## file with seeds
	file_seed=$results_dir'/sp1_motifs_10e-7_seed.txt'

	mkdir -p $results_dir
	touch $file_seed

	# parameters
	n_iter='20'
	n_shift='1'
	-n_core=24
	+n_core=28

	# sequences
	for k in 1 2 3 4 5 6 7 8 9 10
	do
	seed=$(< /dev/urandom tr -dc _A-Z-a-z-0-9 \| head -c${1:-15};echo)
	file_prob=$results_dir/'sp1_motifs_10e-7_open_bin1bp_sequences_'$k'class_prob.mat4d'
	file_mod1=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_model.mat'
	file_mod2=$results_dir/'sp1_motifs_10e-7_1nucl_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod3=$results_dir/'sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center_'$k'class_model.mat'
	file_mod4=$results_dir/'sp1_motifs_10e-7_sequences_'$k'class_model.mat'
	file_aic=$results_dir/'sp1_motifs_10e-7_open_bin1bp_read_atac_'$k'class_aic.txt'
	echo "$file_prob $seed" >> $file_seed
	bin/EMSequence --seq $file_mat_seq --class $k --shift $n_shift --flip --iter $n_iter --seed $seed --thread $n_core --out $file_prob
	bin/ProbToModel --read $file_mat_open --prob $file_prob --thread $n_core 1> $file_mod1
	bin/ProbToModel --read $file_mat_1nucl --prob $file_prob --thread $n_core 1> $file_mod2
	bin/ProbToModel --read $file_mat_nucl --prob $file_prob --thread $n_core 1> $file_mod3
	bin/ProbToModel --seq $file_mat_seq --prob $file_prob --thread $n_core 1> $file_mod4
	done
	diff --git a/src/Clustering/ConsensusSequenceLayer.cpp b/src/Clustering/ConsensusSequenceLayer.cpp
	index 5eab0b1..05c7e1e 100644
	--- a/src/Clustering/ConsensusSequenceLayer.cpp
	+++ b/src/Clustering/ConsensusSequenceLayer.cpp
	@@ -1,377 +1,371 @@
	#include <ConsensusSequenceLayer.hpp>

	#include <vector>
	#include <future> // std::promise, std::future
	#include <stdexcept> // std::invalid_argument
	#include <cmath> // log()
	#include <functional> // std::bind(), std::ref()

	#include <ThreadPool.hpp>
	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <dna_utility.hpp> // dna::base_composition()


	double ConsensusSequenceLayer::score_consensus_subseq(const Matrix3D<double>& cons_seq,
	size_t row,
	size_t start,
	const Matrix2D<double>& model)
	{ size_t ncol = cons_seq.get_dim()[1] ;
	size_t dim3 = cons_seq.get_dim()[2] ;

	if(start > ncol - model.get_nrow())
	{ char msg[4096] ;
	sprintf(msg, "Error! given start (%zu) is too high. Max value is %zu",
	start, ncol - model.get_nrow()) ;
	throw std::invalid_argument(msg) ;
	}
	else if(model.get_nrow() > ncol)
	{ char msg[4096] ;
	sprintf(msg, "Error! given model is longer than sequences (%zu / %zu)",
	model.get_nrow(), ncol) ;
	throw std::invalid_argument(msg) ;
	}
	else if(model.get_ncol() != 4)
	{ char msg[4096] ;
	sprintf(msg, "Error! given model 2nd dimension is not 4 (%zu)",
	model.get_ncol()) ;
	throw std::invalid_argument(msg) ;
	}
	else if(dim3 != 4)
	{ char msg[4096] ;
	sprintf(msg, "Error! given data 3rd dimension is not 4 (%zu)",
	dim3) ;
	throw std::invalid_argument(msg) ;
	}

	size_t from = start ;
	size_t to = from + model.get_nrow() ; // will score [from, to)


	double ll = 0 ;

	for(size_t j_seq=start, j_mod=0; j_seq<to; j_seq++, j_mod++)
	{ double sum = 0. ;
	for(size_t k=0; k<dim3; k++)
	{ sum += (cons_seq(row, j_seq, k) * model(j_mod, k)) ; }
	ll += log(sum) ;
	}

	return ll ;
	}


	ConsensusSequenceLayer::ConsensusSequenceLayer(const Matrix3D<double>& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class)
	: Data3DLayer(data, n_class, n_shift, flip,bckg_class)
	{
	this->n_category = 4 ;

	// initialise the empty model
	this->model = Matrix3D<double>(this->n_class,
	this->l_model,
	this->n_category,
	0.) ;
	// background class
	if(this->bckg_class)
	{ this->create_bckg_class() ; }
	}

	ConsensusSequenceLayer::ConsensusSequenceLayer(Matrix3D<double>&& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class)
	: Data3DLayer(std::move(data), n_class, n_shift, flip, bckg_class)
	{
	this->n_category = 4 ;

	// initialise the empty model
	this->model = Matrix3D<double>(this->n_class,
	this->l_model,
	this->n_category,
	0.) ;

	// background class
	if(this->bckg_class)
	{ this->create_bckg_class() ; }
	}

	ConsensusSequenceLayer::~ConsensusSequenceLayer()
	{ ; }

	void ConsensusSequenceLayer::compute_loglikelihoods(Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	ThreadPool* threads) const
	{
	// dimension checks
	this->check_loglikelihood_dim(loglikelihood) ;
	this->check_loglikelihood_max_dim(loglikelihood_max) ;

	// compute the prob model and the prob reverse-complement model
	std::vector<Matrix2D<double>> model(this->n_class,
	Matrix2D<double>(this->l_model,
	this->n_category,
	0.)) ;
	std::vector<Matrix2D<double>> model_rev = model ;
	for(size_t i=0; i<this->n_class; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ // forward
	model[i](j,k) = this->model(i,j,k) ;
	// reverse
	model_rev[i](this->l_model-j-1,this->n_category-k-1)
	= this->model(i,j,k) ;
	}
	}
	}

	// don't parallelize
	if(threads == nullptr)
	{ std::promise<bool> promise ;
	std::future<bool> future = promise.get_future() ;
	this->compute_loglikelihoods_routine(0, this->n_dim1,
	loglikelihood,
	loglikelihood_max,
	model,
	model_rev,
	promise) ;
	future.get() ;
	}
	// parallelize
	else
	{ size_t n_threads = threads->getNThread() ;
	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_dim1, n_threads) ;

	// get promises and futures
	// the function run by the threads will simply fill the promise with
	// "true" to indicate that they are done
	std::vector<std::promise<bool>> promises(n_threads) ;
	std::vector<std::future<bool>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }
	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	threads->addJob(std::move(
	std::bind(&ConsensusSequenceLayer::compute_loglikelihoods_routine,
	this,
	slice.first,
	slice.second,
	std::ref(loglikelihood),
	std::ref(loglikelihood_max),
	std::ref(model),
	std::ref(model_rev),
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	for(auto& future : futures)
	{ future.get() ; }
	// -------------------------- threads stop ---------------------------
	}
	}

	void ConsensusSequenceLayer::compute_loglikelihoods_routine(size_t from,
	size_t to,
	Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	const std::vector<Matrix2D<double>>& model,
	const std::vector<Matrix2D<double>>& model_rev,
	std::promise<bool>& done) const
	{
	// compute log likelihood
	for(size_t i=from; i<to; i++)
	{
	// set max to min possible value
	loglikelihood_max[i] = std::numeric_limits<double>::lowest() ;

	for(size_t j=0; j<this->n_class; j++)
	{
	for(size_t s=0; s<this->n_shift; s++)
	{ // forward strand
	- { double ll_fw = std::max(score_consensus_subseq(this->data, i, s, model[j]),
	- ConsensusSequenceLayer::p_min_log);
	- // loglikelihood(i,j,s,flip_states::FORWARD) = ll_fw ;
	- // apply lower bound here -> log(1e-100)
	+ { double ll_fw = score_consensus_subseq(this->data, i, s, model[j]) ;
	loglikelihood(i,j,s,flip_states::FORWARD) = ll_fw ;
	// keep track of max per row
	if(ll_fw > loglikelihood_max[i])
	{ loglikelihood_max[i] = ll_fw ; }

	}
	// reverse
	if(this->flip)
	- { double ll_rev = std::max(score_consensus_subseq(this->data, i, s, model_rev[j]),
	- ConsensusSequenceLayer::p_min_log) ;
	- // loglikelihood(i,j,s,flip_states::REVERSE) = ll_rev ;
	- // apply lower bound here -> log(1e-100)
	+ { double ll_rev = score_consensus_subseq(this->data, i, s, model_rev[j]) ;
	loglikelihood(i,j,s,flip_states::REVERSE) = ll_rev ;
	// keep track of max per row
	if(ll_rev > loglikelihood_max[i])
	{ loglikelihood_max[i] = ll_rev ; }
	}
	}
	}
	}
	done.set_value(true) ;
	}

	void ConsensusSequenceLayer::update_model(const Matrix4D<double>& posterior_prob,
	ThreadPool* threads)
	{ // don't parallelize
	if(threads == nullptr)
	{ std::promise<Matrix3D<double>> promise ;
	std::future<Matrix3D<double>> future = promise.get_future() ;
	this->update_model_routine(0,
	this->n_dim1,
	posterior_prob,
	promise) ;
	// this->model = future.get() ;
	auto model = future.get() ;
	size_t n_class_to_update = this->n_class - this->bckg_class ;
	for(size_t i=0; i<n_class_to_update; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) = model(i,j,k) ; }
	}
	}
	}
	// parallelize
	else
	{ size_t n_threads = threads->getNThread() ;
	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_dim1, n_threads) ;

	// get promises and futures
	// the function run by the threads will simply fill the promise with
	// "true" to indicate that they are done
	std::vector<std::promise<Matrix3D<double>>> promises(n_threads) ;
	std::vector<std::future<Matrix3D<double>>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }
	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	threads->addJob(std::move(
	std::bind(&ConsensusSequenceLayer::update_model_routine,
	this,
	slice.first,
	slice.second,
	std::ref(posterior_prob),
	std::ref(promises[i])))) ;
	}
	// reinitialise the model
	size_t n_class_to_update = this->n_class - this->bckg_class ;
	for(size_t i=0; i<n_class_to_update; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) = 0. ; }
	}
	}
	// wait until all threads are done working
	// and update the model
	for(auto& future : futures)
	{ Matrix3D<double> model_part = future.get() ;
	// for(size_t i=0; i<this->n_class; i++)
	for(size_t i=0; i<n_class_to_update; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) += model_part(i,j,k) ; }
	}
	}
	}
	// -------------------------- threads stop ---------------------------
	}
	// make sure to have no 0 values
	for(size_t i=0; i<this->n_class; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) =
	std::max(this->model(i,j,k), ConsensusSequenceLayer::p_min) ;
	}
	}
	}
	// normalize to get probs
	for(size_t i=0; i<this->n_class; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ double sum = 0. ;
	for(size_t k=0; k<this->n_category; k++)
	{ sum += this->model(i,j,k) ; }
	for(size_t k=0; k<this->n_category; k++)
	{ double p = this->model(i,j,k) / sum ;
	this->model(i,j,k) = p ;
	}
	}
	}
	}

	void ConsensusSequenceLayer::update_model_routine(size_t from,
	size_t to,
	const Matrix4D<double>& posterior_prob,
	std::promise<Matrix3D<double>>& promise) const
	{ // dimension checks
	this->check_posterior_prob_dim(posterior_prob) ;

	Matrix3D<double> model(this->n_class,
	this->l_model,
	this->n_category,
	0.) ;

	size_t n_class_to_update = this->n_class - this->bckg_class ;

	for(size_t k=0; k < n_class_to_update; k++)
	{ for(size_t s=0; s<this->n_shift; s++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ // base prob on fw and rv strand
	vector_d base_prob_fw(this->n_category, 0.) ;
	vector_d base_prob_rv(this->n_category, 0.) ;
	for(size_t i=from; i<to; i++)
	{ for(size_t base=0, base_rv=3; base<this->n_dim3; base++, base_rv--)
	{ // --------------- forward ---------------
	{ base_prob_fw[base] +=
	this->data(i,s+j,base) *
	posterior_prob(i,k,s,ConsensusSequenceLayer::FORWARD) ;
	}
	// --------------- reverse ---------------
	if(this->flip)
	{
	base_prob_rv[base_rv] +=
	this->data(i,s+j,base) *
	posterior_prob(i,k,s,ConsensusSequenceLayer::REVERSE) ;
	}
	}
	}
	// update this position of the model
	for(size_t i=0,i_rev=base_prob_fw.size()-1; i<base_prob_fw.size(); i++,i_rev--)
	{ // --------------- forward ---------------
	{ model(k,j,i) += base_prob_fw[i] ; }
	// --------------- reverse ---------------
	if(this->flip)
	{ model(k,this->l_model-j-1,i) += base_prob_rv[i] ; }
	}
	}
	}
	}
	promise.set_value(model) ;
	}

	void ConsensusSequenceLayer::create_bckg_class()
	{ // sequence composition
	std::vector<double> base_comp =
	dna::base_composition(this->data,
	this->flip) ;
	// set last class as background class
	for(size_t i=0; i<this->n_category; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ this->model(this->n_class-1,j,i) = base_comp[i] ; }
	}
	}
	diff --git a/src/Clustering/ConsensusSequenceLayer.hpp b/src/Clustering/ConsensusSequenceLayer.hpp
	index 5eb71e8..c0a18b8 100644
	--- a/src/Clustering/ConsensusSequenceLayer.hpp
	+++ b/src/Clustering/ConsensusSequenceLayer.hpp
	@@ -1,196 +1,202 @@
	#ifndef CONSENSUSSEQUENCELAYER_HPP
	#define CONSENSUSSEQUENCELAYER_HPP

	#include <Data3DLayer.hpp>

	#include <vector>
	#include <future> // std::promise

	#include <ThreadPool.hpp>
	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>


	typedef std::vector<double> vector_d ;


	class ConsensusSequenceLayer : public Data3DLayer
	{
	public:
	/*!
	* \brief Computes the log-likelihood of the sub-
	* sequence - stored in a given row (1st dimension) -
	* and starting at the offset <col> (2nd dimension) in
	* the given sequence matrix.
	* The subsequence length is determined by the model
	* lenght.
	* \param cons_seq a probability matrix containing
	* the consensus sequence. Its dimensions are :
	* 1st the different sequences
	* 2nd the sequences length
	* 3rd 4 for A,C,G,T.
	* \param row the row containing the sequence of
	* interest.
	* \param col the index at which the sub-sequence
	* is starting (1st index inside the subsequence
	* of interest).
	* \param model a model containing the probability
	* model.
	* \return the log-likelihood of the sub-sequence
	* given the model.
	* \throw std::invalid_argument if 1) the offset is
	* invalid, 2) the sequence and the model have
	* incompatible dimensions or 3) the model 2n dimension
	* is not 4 (A,C,G,T).
	*/
	static double score_consensus_subseq(const Matrix3D<double>& cons_seq,
	size_t row,
	size_t col,
	const Matrix2D<double>& model) ;
	public:
	/*!
	* \brief Constructs an object with the
	* given data and an empty (0 values)
	* model.
	* \param data the data.
	* \param n_class the number of classes
	* of the model.
	* \param n_shift the number of shift
	* states of the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	ConsensusSequenceLayer(const Matrix3D<double>& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class) ;

	/*!
	* \brief Constructs an object with the
	* given data and an empty (0 values)
	* model.
	* \param data the data.
	* \param n_class the number of classes
	* of the model.
	* \param n_shift the number of shift
	* states of the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	ConsensusSequenceLayer(Matrix3D<double>&& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class) ;

	/*!
	* \brief Destructor.
	*/
	virtual ~ConsensusSequenceLayer() ;

	/*!
	* \brief Computes the log likelihood of the data
	* given the current model parameters.
	* \param logliklihood a matrix to store the
	* results. It should have the following dimensions :
	* 1st : same as the data number of row
	* 2nd : same as the model number of classes
	* 3rd : same as the number of shifts
	* 4th : same as the number of flip states
	* \param loglikelihood_max a vector containing the
	* max value for each row of loglikelihood.
	* Its length should be equal to the data row number.
	* \throw std::invalid_argument if the dimensions are
	* incorrect.
	*/
	virtual void compute_loglikelihoods(Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	ThreadPool* threads=nullptr) const override ;

	/*!
	* \brief Updates the model given the posterior
	* probabilities (the probabilities of each row
	* in the data to be assigned to each class,
	* for each shift and flip state).
	+ * 0 values inside the model are forbidden.
	+ * The lowest possible value inside the model
	+ * is SequenceLayer::p_min.
	* \param posterior_prob the data assignment probabilities to
	* the different classes.
	*/
	virtual void update_model(const Matrix4D<double>& posterior_prob,
	ThreadPool* threads=nullptr) override ;

	protected:
	/*!
	* \brief The routine that effectively performs the
	* loglikelihood computations.
	* \param from the index of the first row of the data
	* to considered.
	* \param to the index of the past last row of the data
	* to considered.
	* \param loglikelihood a matrix to store the
	* results. It should have the following dimensions :
	* 1st : same as the data number of row
	* 2nd : same as the model number of classes
	* 3rd : same as the number of shifts
	* 4th : same as the number of flip states
	* \param loglikelihood_max a vector containing the
	* max value for each row of log_likelihood.
	* Its length should be equal to the data row number.
	* \param model_log a vector containing the matrices with
	* the log values of the model for each class.
	* \param model_log_rev a vector containing the matrices with
	* the log values of the reverse strand model for each class
	* (the 1st position in the model becomes the last in the
	* reverse model and probabilities are swapped A<->T and C<->G).
	* \param done a promise to be filled when the routine
	* is done running.
	*/
	void compute_loglikelihoods_routine(size_t from,
	size_t to,
	Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	const std::vector<Matrix2D<double>>& model_log,
	const std::vector<Matrix2D<double>>& model_log_rev,
	std::promise<bool>& done) const ;

	/*!
	* \brief The routine that effectively update the model.
	+ * 0 values inside the model are forbidden.
	+ * The lowest possible value inside the model
	+ * is SequenceLayer::p_min.
	* \param from the index of the first row of the
	* posterior probabilities to considered.
	* \param to the index of the past last row of the
	* posterior probabilities to considered.
	* \param posterior_prob the data assignment probabilities
	* to the different classes.
	* \param
	* \param done a promise containing the partial model
	* computed from the given data slice. If several routines
	* work together at updating the model, they need to be
	* summed and normalized (by the column sum) to get the
	* final model.
	*/
	void update_model_routine(size_t from,
	size_t to,
	const Matrix4D<double>& posterior_prob,
	std::promise<Matrix3D<double>>& done) const ;

	/*!
	* \brief Sets the last class as background class with the
	* mean base probababilities of the data at each position.
	*/
	void create_bckg_class() ;


	protected:




	} ;


	#endif // CONSENSUSSEQUENCELAYER_HPP
	diff --git a/src/Clustering/DataLayer.cpp b/src/Clustering/DataLayer.cpp
	index a1d8de4..6ab5ff5 100644
	--- a/src/Clustering/DataLayer.cpp
	+++ b/src/Clustering/DataLayer.cpp
	@@ -1,60 +1,60 @@
	#include <DataLayer.hpp>

	#include <stdexcept> // std::invalid_argument
	#include <cmath> // log()

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <ThreadPool.hpp>

	DataLayer::DataLayer()
	{ ; }

	DataLayer::DataLayer(size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class)
	: flip(flip),
	n_class(n_class),
	n_shift(n_shift),
	n_flip(flip + 1),
	bckg_class(bckg_class)
	{ // models cannot be initialise here
	// as the number of categories depend
	// on the exact class
	}

	DataLayer::DataLayer(const Matrix3D<double>& model,
	bool flip,
	bool bckg_class)
	: model(model),
	flip(flip),
	n_class(this->model.get_dim()[0]),
	l_model(this->model.get_dim()[1]),
	n_category(this->model.get_dim()[2]),
	n_flip(flip + 1),
	bckg_class(bckg_class)
	{ ; }

	DataLayer::DataLayer(Matrix3D<double>&& model,
	bool flip,
	bool bckg_class)
	: model(model),
	flip(flip),
	n_class(this->model.get_dim()[0]),
	l_model(this->model.get_dim()[1]),
	n_category(this->model.get_dim()[2]),
	n_flip(flip + 1),
	bckg_class(bckg_class)
	{ ; }

	DataLayer::~DataLayer()
	{ ; }

	Matrix3D<double> DataLayer::get_model() const
	{ return this->model ; }


	-const double DataLayer::p_min = 1e-100 ;
	+const double DataLayer::p_min = 1e-300 ;
	const double DataLayer::p_min_log = log(DataLayer::p_min) ;
	diff --git a/src/Clustering/EMConsensusSequence.cpp b/src/Clustering/EMConsensusSequence.cpp
	index 9cac9ee..1dd3343 100644
	--- a/src/Clustering/EMConsensusSequence.cpp
	+++ b/src/Clustering/EMConsensusSequence.cpp
	@@ -1,293 +1,294 @@
	#include <EMConsensusSequence.hpp>

	#include <string>
	#include <vector>
	#include <future> // std::promise, std::future
	#include <utility> // std::pair, std::move()
	#include <functional> // std::bind(), std::ref()

	#include <ConsensusSequenceLayer.hpp> // SequenceLayer
	#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
	#include <ConsoleProgressBar.hpp> // ConsoleProgressBar
	#include <ThreadPool.hpp> // ThreadPool
	#include <dna_utility.hpp> // dna::base_composition()


	EMConsensusSequence::EMConsensusSequence(const Matrix3D<double>& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(seq_matrix.get_dim()[0],
	seq_matrix.get_dim()[1],
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	loglikelihood_max(n_row, 0.),
	cseq_layer(nullptr)
	{
	this->loglikelihood_max = vector_d(n_row, 0.) ;

	// initialise post prob randomly
	// getRandomGenerator(seed) ;
	this->set_post_prob_random(seed) ;

	// data and models
	this->cseq_layer = new ConsensusSequenceLayer(seq_matrix,
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class) ;

	// intialise the models with the post prob
	this->cseq_layer->update_model(this->post_prob,
	this->threads) ;
	}

	EMConsensusSequence::EMConsensusSequence(Matrix3D<double>&& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(seq_matrix.get_dim()[0],
	seq_matrix.get_dim()[1],
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	loglikelihood_max(n_row, 0.),
	cseq_layer(nullptr)
	{
	this->loglikelihood_max = vector_d(n_row, 0.) ;

	// initialise post prob randomly
	// getRandomGenerator(seed) ;
	this->set_post_prob_random(seed) ;

	// data and models
	this->cseq_layer = new ConsensusSequenceLayer(std::move(seq_matrix),
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class) ;

	// intialise the models with the post prob
	this->cseq_layer->update_model(this->post_prob,
	this->threads) ;
	}

	EMConsensusSequence::~EMConsensusSequence()
	{ if(this->cseq_layer != nullptr)
	{ delete this->cseq_layer ;
	this->cseq_layer = nullptr ;
	}
	if(this->threads != nullptr)
	{ this->threads->join() ;
	delete this->threads ;
	this->threads = nullptr ;
	}
	}

	Matrix3D<double> EMConsensusSequence::get_sequence_models() const
	{ return this->cseq_layer->get_model() ; }

	EMConsensusSequence::exit_codes EMConsensusSequence::classify()
	{
	size_t bar_update_n = this->n_iter ;
	ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;

	// optimize the partition
	for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
	{ // E-step
	this->compute_loglikelihood() ;
	this->compute_post_prob() ;
	// M-step
	this->compute_class_prob() ;
	this->update_models() ;
	this->center_post_state_prob() ;
	bar.update() ;
	}
	bar.update() ; std::cerr << std::endl ;
	return EMConsensusSequence::exit_codes::ITER_MAX ;
	}

	void EMConsensusSequence::compute_loglikelihood()
	{ // compute the loglikelihood
	this->cseq_layer->compute_loglikelihoods(this->loglikelihood,
	this->loglikelihood_max,
	this->threads) ;
	// rescale the values
	// don't parallelize
	if(this->threads == nullptr)
	{ std::promise<bool> promise ;
	std::future<bool> future = promise.get_future() ;
	this->compute_loglikelihood_routine(0,
	this->n_row,
	promise) ;
	future.get() ;
	}
	// parallelize
	else
	{ size_t n_threads = this->threads->getNThread() ;

	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row,n_threads) ;

	// get promises and futures
	std::vector<std::promise<bool>> promises(n_threads) ;
	std::vector<std::future<bool>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }

	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	this->threads->addJob(std::move(
	std::bind(&EMConsensusSequence::compute_loglikelihood_routine,
	this,
	slice.first,
	slice.second,
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	for(auto& future : futures)
	{ future.get() ; }
	// -------------------------- threads stop ---------------------------
	}
	}

	void EMConsensusSequence::compute_loglikelihood_routine(size_t from,
	size_t to,
	std::promise<bool>& done)
	{
	// rescale the values
	for(size_t i=from; i<to; i++)
	{ for(size_t j=0; j<this->n_class; j++)
	{ for(size_t k=0; k<this->n_shift; k++)
	{ for(size_t l=0; l<this->n_flip; l++)
	{ this->loglikelihood(i,j,k,l) =
	- (this->loglikelihood(i,j,k,l) -
	- this->loglikelihood_max[i]) ;
	+ std::max(this->loglikelihood(i,j,k,l) -
	+ this->loglikelihood_max[i],
	+ ConsensusSequenceLayer::p_min_log) ;
	}
	}
	}
	}
	done.set_value(true) ;
	}

	void EMConsensusSequence::compute_post_prob()
	{ // don't parallelize
	if(this->threads == nullptr)
	{ std::promise<vector_d> promise ;
	std::future<vector_d> future = promise.get_future() ;
	this->compute_post_prob_routine(0, this->n_row, promise) ;
	// compute the sum of post prob and the per class sum of post prob
	// from the partial results computed on each slice
	this->post_prob_tot = 0. ;
	this->post_prob_colsum = future.get() ;
	for(const auto& prob : this->post_prob_colsum)
	{ this->post_prob_tot += prob ; }
	}
	// parallelize
	else
	{ size_t n_threads = this->threads->getNThread() ;

	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row,n_threads) ;

	// get promises and futures
	// the function run by the threads will compute
	// the partial sum per class of post_prob for the given slice
	// this should be used to compute the complete sum of post_prob
	// and the complete sum per class of post_prob
	std::vector<std::promise<vector_d>> promises(n_threads) ;
	std::vector<std::future<vector_d>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }

	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	this->threads->addJob(std::move(
	std::bind(&EMConsensusSequence::compute_post_prob_routine,
	this,
	slice.first,
	slice.second,
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	// compute the sum of post prob and the per class sum of post prob
	// from the partial results computed on each slice
	this->post_prob_tot = 0. ;
	this->post_prob_colsum = vector_d(this->n_class, 0.) ;
	for(auto& future : futures)
	{ auto probs = future.get() ;
	for(size_t i=0; i<this->n_class; i++)
	{ double prob = probs[i] ;
	this->post_prob_colsum[i] += prob ;
	this->post_prob_tot += prob ;
	}
	}
	// -------------------------- threads stop ---------------------------
	}
	}


	void EMConsensusSequence::compute_post_prob_routine(size_t from,
	size_t to,
	std::promise<vector_d>& post_prob_colsum)
	{ vector_d colsums(this->n_class, 0.) ;

	// reset grand total
	// this->post_prob_tot = 0 ;
	// this->post_prob_colsum = vector_d(n_class, 0) ;

	// post prob
	for(size_t i=from; i<to; i++)
	{ // reset row sum to 0
	this->post_prob_rowsum[i] = 0. ;
	for(size_t n_class=0; n_class<this->n_class; n_class++)
	{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
	{ for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
	{
	double p = exp(this->loglikelihood(i,n_class,n_shift,n_flip)) *
	this->post_state_prob(n_class,n_shift,n_flip) ;
	this->post_prob(i,n_class,n_shift,n_flip) = p ;
	this->post_prob_rowsum[i] += p ;
	}
	}
	}
	// normalize
	for(size_t n_class=0; n_class<this->n_class; n_class++)
	{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
	{ for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
	{
	double p = std::max(this->post_prob(i,n_class,n_shift,n_flip) /
	this->post_prob_rowsum[i],
	ConsensusSequenceLayer::p_min) ;
	this->post_prob(i,n_class,n_shift,n_flip) = p ;
	colsums[n_class] += p ;
	}
	}
	}
	}
	post_prob_colsum.set_value(colsums) ;
	}

	void EMConsensusSequence::update_models()
	{ this->cseq_layer->update_model(this->post_prob,
	this->threads) ;
	}
	diff --git a/src/Clustering/EMConsensusSequence.hpp b/src/Clustering/EMConsensusSequence.hpp
	index 8ba881c..01ff753 100644
	--- a/src/Clustering/EMConsensusSequence.hpp
	+++ b/src/Clustering/EMConsensusSequence.hpp
	@@ -1,184 +1,199 @@
	#ifndef EMCONSENSUSSEQUENCE_HPP
	#define EMCONSENSUSSEQUENCE_HPP

	#include <EMBase.hpp>

	#include <vector>
	#include <string>
	#include <future> // std::promise

	#include <Matrix3D.hpp>
	#include <ConsensusSequenceLayer.hpp>


	typedef std::vector<double> vector_d ;


	class EMConsensusSequence : public EMBase
	{
	public:
	/*!
	* \brief Constructs an object to partition the
	* given consensus sequences (rows) according to
	* their motif content.
	* The sequences models are initialised randomly.
	* \param sequence_matrix a matrix containing the
	* consensus sequences in a probability matrix.
	* Its dimensions are :
	* 1st the number of consensus sequences
	* 2nd the length of the consensus sequences
	* 3rd 4 for A,C,G,T
	* The sums over the 1st and 2nd dimensions should
	* be 1. The overall sum of the matrix values should
	* be the st dimension.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model the background
	* by setting all its parameters, at all positions, to the
	* background base probabilties. Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMConsensusSequence(const Matrix3D<double>& sequence_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;
	/*!
	* \brief Constructs an object to partition the
	* given consensus sequences (rows) according to
	* their motif
	* content.
	* The sequences models are initialised randomly.
	* \param sequence_matrix a matrix containing the
	* consensus sequences in a probability matrix.
	* Its dimensions are :
	* 1st the number of consensus sequences
	* 2nd the length of the consensus sequences
	* 3rd 4 for A,C,G,T
	* The sums over the 1st and 2nd dimensions should
	* be 1. The overall sum of the matrix values should
	* be the st dimension.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model the background
	* by setting all its parameters, at all positions, to the
	* background base probabilties. Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMConsensusSequence(Matrix3D<double>&& sequence_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	EMConsensusSequence(const EMConsensusSequence& other) = delete ;

	/*!
	* \brief Destructor.
	*/
	virtual ~EMConsensusSequence() override ;

	/*!
	* \brief Returns the class sequence model.
	* \return the class sequence model.
	*/
	Matrix3D<double> get_sequence_models() const ;

	/*!
	* \brief Runs the sequence model optimization and
	* the data classification.
	* \return a code indicating how the optimization
	* ended.
	*/
	virtual EMConsensusSequence::exit_codes classify() override ;

	private:

	/*!
	* \brief Computes the data log likelihood given the
	* current models, for all layers and the joint
	* likelihood for each state (the sum of the layer
	* likelihoods for all layers, for a given state).
	+ * To avoid numerical issues when computing posterior
	+ * probabilities, the lowest possible value authorized
	+ * as log likelihood is ConsensusSequenceLayer::p_min_log.
	+ * Any value below is replaced by this one.
	*/
	virtual void compute_loglikelihood() override ;

	/*!
	* \brief This is a routine of compute_loglikelihood().
	* This method rescales the loglikelihood values by
	* substacting to each value the maximum loglikelihood
	* value found in the same data row.
	- * This method
	+ * To avoid numerical issues when computing posterior
	+ * probabilities, the lowest possible value authorized
	+ * as log likelihood is ConsensusSequenceLayer::p_min_log.
	+ * Any value below is replaced by this one.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done a promise to fill when the method
	* is done.
	*/
	void compute_loglikelihood_routine(size_t from,
	size_t to,
	std::promise<bool>& done) ;

	/*!
	* \brief Computes the data posterior probabilties.
	+ * To avoid numerical issues the lowest possible
	+ * value authorized as posterior probability is
	+ * ConsensusSequenceLayer::p_min. Any value below
	+ * is replaced by this one.
	*/
	virtual void compute_post_prob() override ;

	/*!
	* \brief The routine that effectively computes
	* the posterior probabilties.
	+ * To avoid numerical issues the lowest possible
	+ * value authorized as posterior probability is
	+ * ConsensusSequenceLayer::p_min. Any value below
	+ * is replaced by this one.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done the partial column (over the classes)
	* sum of posterior probabilities. If several routines
	* are running together, the colsums are retrieved by
	* summing up the vectors together.
	*/
	void compute_post_prob_routine(size_t from,
	size_t to,
	std::promise<vector_d>& post_prob_colsum) ;

	/*!
	* \brief Update the data models for all layers, given
	* the current posterior and class probabilities.
	*/
	virtual void update_models() override ;

	/*!
	* \brief the max loglikelihood value for
	* each data row.
	*/
	std::vector<double> loglikelihood_max ;
	/*!
	* \brief A pointer to the object managing
	* the data and their model.
	*/
	ConsensusSequenceLayer* cseq_layer ;

	} ;

	#endif // EMCONSENSUSSEQUENCE_HPP
	diff --git a/src/Clustering/EMJoint.cpp b/src/Clustering/EMJoint.cpp
	index 7ff0a16..c15a79b 100644
	--- a/src/Clustering/EMJoint.cpp
	+++ b/src/Clustering/EMJoint.cpp
	@@ -1,837 +1,814 @@

	#include <EMJoint.hpp>

	#include <string>
	#include <vector>
	#include <stdexcept>
	#include <future> // std::promise, std::future
	#include <utility> // std::pair, std::move()
	#include <functional> // std::bind(), std::ref()
	#include <cmath> // exp()

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <ReadLayer.hpp>
	#include <SequenceLayer.hpp>
	#include <ThreadPool.hpp>
	#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
	#include <ConsoleProgressBar.hpp> // ConsoleProgressBar

	template<class T>
	std::ostream& operator << (std::ostream& stream, const std::vector<T>& v)
	{ for(const auto& x : v)
	{ stream << x << " " ; }
	return stream ;
	}

	EMJoint::EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(read_matrices[0].get_nrow(),
	read_matrices[0].get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	n_layer(read_matrices.size()),
	loglikelihood_layer(n_layer,
	Matrix4D<double>(this->n_row,
	this->n_class,
	this->n_shift,
	this->n_flip,
	0.)),
	loglikelihood_max(this->n_layer,
	vector_d(this->n_row, 0.)),
	read_layers(),
	seq_layer(nullptr),
	consseq_layer(nullptr)
	{
	// check data matrices and their dimensions
	if(this->n_layer == 0)
	{ throw std::invalid_argument("Error! No data layer given!") ; }
	for(const auto& matrix : read_matrices)
	{ if(matrix.get_nrow() != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! Read layers have variable row numbers "
	"(%zu and %zu)!",
	matrix.get_nrow(), this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(matrix.get_ncol() != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! Read layers have variable column numbers "
	"(%zu and %zu)!",
	matrix.get_ncol(), this->n_col) ;
	throw std::invalid_argument(msg) ;
	}
	}

	// initialise post prob randomly
	this->set_post_prob_random(seed) ;

	// data and models
	// create read layer and initialise the models from the post prob
	for(auto& matrix : read_matrices)
	{ // create the layer
	this->read_layers.push_back(new ReadLayer(matrix,
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class,
	this->threads)) ;
	this->read_layers.back()->update_model(this->post_prob,
	this->threads) ;
	}
	}

	EMJoint::EMJoint(std::vector<Matrix2D<int>>&& read_matrices,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(read_matrices[0].get_nrow(),
	read_matrices[0].get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	n_layer(read_matrices.size()),
	loglikelihood_layer(n_layer,
	Matrix4D<double>(this->n_row,
	this->n_class,
	this->n_shift,
	this->n_flip,
	0.)),
	loglikelihood_max(this->n_layer,
	vector_d(this->n_row, 0.)),
	read_layers(),
	seq_layer(nullptr),
	consseq_layer(nullptr)
	{
	// check data matrices and their dimensions
	if(this->n_layer == 0)
	{ throw std::invalid_argument("Error! No data layer given!") ; }
	for(const auto& matrix : read_matrices)
	{ if(matrix.get_nrow() != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! Read layers have variable row numbers "
	"(%zu and %zu)!",
	matrix.get_nrow(), this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(matrix.get_ncol() != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! Read layers have variable column numbers "
	"(%zu and %zu)!",
	matrix.get_ncol(), this->n_col) ;
	throw std::invalid_argument(msg) ;
	}
	}

	// initialise post prob randomly
	this->set_post_prob_random(seed) ;

	// data and models
	// create read layer and initialise the models from the post prob
	for(auto& matrix : read_matrices)
	{
	// create the layer
	this->read_layers.push_back(new ReadLayer(std::move(matrix),
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class,
	this->threads)) ;
	this->read_layers.back()->update_model(this->post_prob,
	this->threads) ;
	}
	}

	EMJoint::EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
	const Matrix2D<int>& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(read_matrices[0].get_nrow(),
	read_matrices[0].get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	n_layer(read_matrices.size()+1),
	loglikelihood_layer(this->n_layer,
	Matrix4D<double>(this->n_row,
	this->n_class,
	this->n_shift,
	this->n_flip,
	0.)),
	loglikelihood_max(this->n_layer,
	vector_d(this->n_row, 0.)),
	read_layers(),
	seq_layer(nullptr),
	consseq_layer(nullptr)
	{ // check data matrices and their dimensions
	for(const auto& matrix : read_matrices)
	{ if(matrix.get_nrow() != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! A read matrix row number is different than expected "
	"(%zu instead of %zu)!",
	matrix.get_nrow(), this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(matrix.get_ncol() != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! A read matrix column number is different than expected "
	"(%zu instead of %zu)!",
	matrix.get_ncol(), this->n_col) ;
	throw std::invalid_argument(msg) ;
	}
	}
	if(seq_matrix.get_nrow() != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! The sequence matrix row number is different than expected "
	"(%zu instead of %zu)!",
	seq_matrix.get_nrow(), this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(seq_matrix.get_ncol() != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! The sequence matrix column number is different than expected "
	"(%zu instead of %zu)!",
	seq_matrix.get_ncol(), this->n_col) ;
	throw std::invalid_argument(msg) ;
	}

	// initialise post prob randomly
	// getRandomGenerator(seed) ;
	this->set_post_prob_random(seed) ;

	// data and models
	// create read layer and initialise the models from the post prob
	for(auto& matrix : read_matrices)
	{ // create the layer
	this->read_layers.push_back(new ReadLayer(matrix,
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class,
	this->threads)) ;
	this->read_layers.back()->update_model(this->post_prob,
	this->threads) ;
	}
	// create sequence layer and initialise the models from the post prob
	this->seq_layer = new SequenceLayer(seq_matrix,
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class) ;
	this->seq_layer->update_model(this->post_prob,
	this->threads) ;
	}

	EMJoint::EMJoint(std::vector<Matrix2D<int>>&& read_matrices,
	Matrix2D<int>&& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(read_matrices[0].get_nrow(),
	read_matrices[0].get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	n_layer(read_matrices.size()+1),
	loglikelihood_layer(this->n_layer,
	Matrix4D<double>(this->n_row,
	this->n_class,
	this->n_shift,
	this->n_flip,
	0.)),
	loglikelihood_max(this->n_layer,
	vector_d(this->n_row, 0.)),
	read_layers(),
	seq_layer(nullptr),
	consseq_layer(nullptr)
	{ // check data matrices and their dimensions
	for(const auto& matrix : read_matrices)
	{ if(matrix.get_nrow() != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! A read matrix row number is different than expected "
	"(%zu instead of %zu)!",
	matrix.get_nrow(), this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(matrix.get_ncol() != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! A read matrix column number is different than expected "
	"(%zu instead of %zu)!",
	matrix.get_ncol(), this->n_col) ;
	throw std::invalid_argument(msg) ;
	}
	}
	if(seq_matrix.get_nrow() != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! The sequence matrix row number is different than expected "
	"(%zu instead of %zu)!",
	seq_matrix.get_nrow(), this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(seq_matrix.get_ncol() != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! The sequence matrix column number is different than expected "
	"(%zu instead of %zu)!",
	seq_matrix.get_ncol(), this->n_col) ;
	throw std::invalid_argument(msg) ;
	}

	// initialise post prob randomly
	// getRandomGenerator(seed) ;
	this->set_post_prob_random(seed) ;

	// data and models
	// create read layer and initialise the models from the post prob
	for(auto& matrix : read_matrices)
	{
	// create the layer
	this->read_layers.push_back(new ReadLayer(std::move(matrix),
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class,
	this->threads)) ;
	this->read_layers.back()->update_model(this->post_prob,
	this->threads) ;
	}

	// create sequence layer and initialise the models from the post prob
	this->seq_layer = new SequenceLayer(std::move(seq_matrix),
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class) ;
	// intialise the models with the post prob
	this->seq_layer->update_model(this->post_prob,
	this->threads) ;
	}

	EMJoint::EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
	const Matrix3D<double>& consseq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(read_matrices[0].get_nrow(),
	read_matrices[0].get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	n_layer(read_matrices.size()+1),
	loglikelihood_layer(this->n_layer,
	Matrix4D<double>(this->n_row,
	this->n_class,
	this->n_shift,
	this->n_flip,
	0.)),
	loglikelihood_max(this->n_layer,
	vector_d(this->n_row, 0.)),
	read_layers(),
	seq_layer(nullptr),
	consseq_layer(nullptr)
	{ // check data matrices and their dimensions
	for(const auto& matrix : read_matrices)
	{ if(matrix.get_nrow() != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! A read matrix row number is different than expected "
	"(%zu instead of %zu)!",
	matrix.get_nrow(), this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(matrix.get_ncol() != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! A read matrix column number is different than expected "
	"(%zu instead of %zu)!",
	matrix.get_ncol(), this->n_col) ;
	throw std::invalid_argument(msg) ;
	}
	}
	if(consseq_matrix.get_dim()[0] != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! The consensus sequence matrix row number is different than expected "
	"(%zu instead of %zu)!",
	consseq_matrix.get_dim()[0], this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(consseq_matrix.get_dim()[1] != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! The sequence matrix column number is different than expected "
	"(%zu instead of %zu)!",
	consseq_matrix.get_dim()[0], this->n_col) ;
	throw std::invalid_argument(msg) ;
	}

	// initialise post prob randomly
	// getRandomGenerator(seed) ;
	this->set_post_prob_random(seed) ;

	// data and models
	// create read layer and initialise the models from the post prob
	for(auto& matrix : read_matrices)
	{ // create the layer
	this->read_layers.push_back(new ReadLayer(matrix,
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class,
	this->threads)) ;
	this->read_layers.back()->update_model(this->post_prob,
	this->threads) ;
	}
	// create consensus sequence layer and initialise the models from the post prob
	{
	this->consseq_layer = new ConsensusSequenceLayer(consseq_matrix,
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class) ;
	this->consseq_layer->update_model(this->post_prob,
	this->threads) ;
	}
	}

	EMJoint::EMJoint(std::vector<Matrix2D<int>>&& read_matrices,
	Matrix3D<double>&& consseq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(read_matrices[0].get_nrow(),
	read_matrices[0].get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	n_layer(read_matrices.size()+1),
	loglikelihood_layer(this->n_layer,
	Matrix4D<double>(this->n_row,
	this->n_class,
	this->n_shift,
	this->n_flip,
	0.)),
	loglikelihood_max(this->n_layer,
	vector_d(this->n_row, 0.)),
	read_layers(),
	seq_layer(nullptr),
	consseq_layer(nullptr)
	{ // check data matrices and their dimensions
	for(const auto& matrix : read_matrices)
	{ if(matrix.get_nrow() != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! A read matrix row number is different than expected "
	"(%zu instead of %zu)!",
	matrix.get_nrow(), this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(matrix.get_ncol() != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! A read matrix column number is different than expected "
	"(%zu instead of %zu)!",
	matrix.get_ncol(), this->n_col) ;
	throw std::invalid_argument(msg) ;
	}
	}
	if(consseq_matrix.get_dim()[0] != this->n_row)
	{ char msg[4096] ;
	sprintf(msg, "Error! The consensus sequence matrix row number is different than expected "
	"(%zu instead of %zu)!",
	consseq_matrix.get_dim()[0], this->n_row) ;
	throw std::invalid_argument(msg) ;
	}
	else if(consseq_matrix.get_dim()[1] != this->n_col)
	{ char msg[4096] ;
	sprintf(msg, "Error! The sequence matrix column number is different than expected "
	"(%zu instead of %zu)!",
	consseq_matrix.get_dim()[0], this->n_col) ;
	throw std::invalid_argument(msg) ;
	}

	// initialise post prob randomly
	// getRandomGenerator(seed) ;
	this->set_post_prob_random(seed) ;

	// data and models
	// create read layer and initialise the models from the post prob
	for(auto& matrix : read_matrices)
	{ // create the layer
	this->read_layers.push_back(new ReadLayer(std::move(matrix),
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class,
	this->threads)) ;
	this->read_layers.back()->update_model(this->post_prob,
	this->threads) ;
	}
	// create consensus sequence layer and initialise the models from the post prob
	{
	this->consseq_layer = new ConsensusSequenceLayer(std::move(consseq_matrix),
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class) ;
	this->consseq_layer->update_model(this->post_prob,
	this->threads) ;
	}
	}

	EMJoint::~EMJoint()
	{ // join the threads in case
	// deleted by EMBase destructor
	if(this->threads != nullptr)
	{ this->threads->join() ;
	delete this->threads ;
	this->threads = nullptr ;
	}

	// read data and models
	for(auto& ptr : this->read_layers)
	{ if(ptr != nullptr)
	{ delete ptr ;
	ptr = nullptr ;
	}
	}
	// sequence data and models
	if(this->seq_layer != nullptr)
	{ delete seq_layer ;
	this->seq_layer = nullptr ;
	}
	// consensus sequence data and models
	if(this->consseq_layer != nullptr)
	{ delete this->consseq_layer ;
	this->consseq_layer = nullptr ;
	}
	}

	std::vector<Matrix3D<double>> EMJoint::get_read_models() const
	{ std::vector<Matrix3D<double>> models ;
	for(const auto& ptr : this->read_layers)
	{ models.push_back(ptr->get_model()) ; }
	return models ;
	}

	Matrix3D<double> EMJoint::get_sequence_models() const
	{ if(this->seq_layer != nullptr)
	{ return this->seq_layer->get_model() ; }
	return Matrix3D<double>() ;
	}

	Matrix3D<double> EMJoint::get_consensus_sequence_models() const
	{ if(this->consseq_layer != nullptr)
	{ return this->consseq_layer->get_model() ; }
	return Matrix3D<double>() ;
	}


	EMJoint::exit_codes EMJoint::classify()
	{ size_t bar_update_n = this->n_iter ;
	ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;


	// optimize the partition
	for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
	{ // E-step
	this->compute_loglikelihood() ;
	this->compute_post_prob() ;
	// M-step
	this->compute_class_prob() ;
	this->update_models() ;
	this->center_post_state_prob() ;
	bar.update() ;
	}
	bar.update() ; std::cerr << std::endl ;

	return EMJoint::exit_codes::ITER_MAX ;
	}

	void EMJoint::compute_loglikelihood()
	{ // compute the loglikelihood for each layer
	size_t i = 0 ;
	for(auto& ptr : this->read_layers)
	{ ptr->compute_loglikelihoods(this->loglikelihood_layer[i],
	this->loglikelihood_max[i],
	this->threads) ;
	i++ ;
	}
	if(this->seq_layer != nullptr)
	{ this->seq_layer->compute_loglikelihoods(this->loglikelihood_layer[i],
	this->loglikelihood_max[i],
	this->threads) ;
	i++ ;
	}
	if(this->consseq_layer != nullptr)
	{ this->consseq_layer->compute_loglikelihoods(this->loglikelihood_layer[i],
	this->loglikelihood_max[i],
	this->threads) ;
	i++ ;
	}

	// sum the likelihood for each state, over all layers
	// and rescale the values
	// don't parallelize
	if(this->threads == nullptr)
	{ std::promise<bool> promise ;
	std::future<bool> future = promise.get_future() ;
	this->compute_loglikelihood_routine(0,
	this->n_row,
	promise) ;
	future.get() ;
	}
	// parallelize
	else
	{ size_t n_threads = this->threads->getNThread() ;

	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row,n_threads) ;

	// get promises and futures
	std::vector<std::promise<bool>> promises(n_threads) ;
	std::vector<std::future<bool>> futures(n_threads) ;
	for(size_t j=0; j<n_threads; j++)
	{ futures[j] = promises[j].get_future() ; }

	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t j=0; j<n_threads; j++)
	{ auto slice = slices[j] ;
	this->threads->addJob(std::move(
	std::bind(&EMJoint::compute_loglikelihood_routine,
	this,
	slice.first,
	slice.second,
	std::ref(promises[j])))) ;
	}
	// wait until all threads are done working
	for(auto& future : futures)
	{ future.get() ; }
	// -------------------------- threads stop ---------------------------
	}
	}

	-/*
	-void EMJoint::compute_loglikelihood_routine(size_t from,
	- size_t to,
	- std::promise<bool>& done)
	-{
	- // limite value range
	- for(size_t i=from; i<to; i++)
	- { for(size_t j=0; j<this->n_class; j++)
	- { for(size_t k=0; k<this->n_shift; k++)
	- { for(size_t l=0; l<this->n_flip; l++)
	- {
	- // reset
	- this->loglikelihood(i,j,k,l) = 0. ;
	- // sum
	- for(size_t m=0; m<this->n_layer; m++)
	- { this->loglikelihood(i,j,k,l) +=
	- (this->loglikelihood_layer[m](i,j,k,l) -
	- this->loglikelihood_max[m][i]) ;
	- }
	- }
	- }
	- }
	- }
	- done.set_value(true) ;
	-}
	-*/
	-
	void EMJoint::compute_loglikelihood_routine(size_t from,
	size_t to,
	std::promise<bool>& done)
	{ // the max likelihood found per row
	std::vector<double> rowmax(to-from, std::numeric_limits<double>::lowest()) ;

	// sum over layers
	for(size_t i=from, i_rowmax=0; i<to; i++, i_rowmax++)
	{ for(size_t j=0; j<this->n_class; j++)
	{ for(size_t k=0; k<this->n_shift; k++)
	{ for(size_t l=0; l<this->n_flip; l++)
	{
	// reset
	this->loglikelihood(i,j,k,l) = 0. ;
	// sum
	for(size_t m=0; m<this->n_layer; m++)
	{ // add rescaled layer value
	this->loglikelihood(i,j,k,l) +=
	(this->loglikelihood_layer[m](i,j,k,l) -
	this->loglikelihood_max[m][i]) ;
	}
	// keep track of max
	if(this->loglikelihood(i,j,k,l) > rowmax[i_rowmax])
	{ rowmax[i_rowmax] = this->loglikelihood(i,j,k,l) ; }
	}
	}
	}
	}

	// rescale
	for(size_t i=from, i_rowmax=0; i<to; i++, i_rowmax++)
	{ for(size_t j=0; j<this->n_class; j++)
	{ for(size_t k=0; k<this->n_shift; k++)
	{ for(size_t l=0; l<this->n_flip; l++)
	- { this->loglikelihood(i,j,k,l) -= rowmax[i_rowmax] ; }
	+ { // this->loglikelihood(i,j,k,l) -= rowmax[i_rowmax] ;
	+ this->loglikelihood(i,j,k,l) =
	+ std::max(this->loglikelihood(i,j,k,l) - rowmax[i_rowmax],
	+ ReadLayer::p_min_log) ;
	+ }
	}
	}
	}
	done.set_value(true) ;
	}


	void EMJoint::compute_post_prob()
	{ // don't parallelize
	if(this->threads == nullptr)
	{ std::promise<vector_d> promise ;
	std::future<vector_d> future = promise.get_future() ;
	this->compute_post_prob_routine(0, this->n_row, promise) ;
	// compute the sum of post prob and the per class sum of post prob
	// from the partial results computed on each slice
	this->post_prob_tot = 0. ;
	this->post_prob_colsum = future.get() ;
	for(const auto& prob : this->post_prob_colsum)
	{ this->post_prob_tot += prob ; }
	}
	// parallelize
	else
	{ size_t n_threads = this->threads->getNThread() ;

	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row,n_threads) ;

	// get promises and futures
	// the function run by the threads will compute
	// the partial sum per class of post_prob for the given slice
	// this should be used to compute the complete sum of post_prob
	// and the complete sum per class of post_prob
	std::vector<std::promise<vector_d>> promises(n_threads) ;
	std::vector<std::future<vector_d>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }

	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	this->threads->addJob(std::move(
	std::bind(&EMJoint::compute_post_prob_routine,
	this,
	slice.first,
	slice.second,
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	// compute the sum of post prob and the per class sum of post prob
	// from the partial results computed on each slice
	this->post_prob_tot = 0. ;
	this->post_prob_colsum = vector_d(this->n_class, 0.) ;
	for(auto& future : futures)
	{ auto probs = future.get() ;
	for(size_t i=0; i<this->n_class; i++)
	{ double prob = probs[i] ;
	this->post_prob_colsum[i] += prob ;
	this->post_prob_tot += prob ;
	}
	}
	// -------------------------- threads stop ---------------------------
	}
	}

	void EMJoint::compute_post_prob_routine(size_t from,
	size_t to,
	std::promise<vector_d>& post_prob_colsum)
	{ vector_d colsums(this->n_class, 0.) ;

	// post prob
	for(size_t i=from; i<to; i++)
	{ // reset row sum to 0
	this->post_prob_rowsum[i] = 0. ;
	for(size_t n_class=0; n_class<this->n_class; n_class++)
	{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
	{ for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
	{
	double p = exp(this->loglikelihood(i,n_class,n_shift,n_flip)) *
	this->post_state_prob(n_class,n_shift,n_flip) ;
	// double p = std::max(exp(this->loglikelihood(i,n_class,n_shift,n_flip)) *
	// this->post_state_prob(n_class,n_shift,n_flip),
	// ReadLayer::p_min) ;
	this->post_prob(i,n_class,n_shift,n_flip) = p ;
	this->post_prob_rowsum[i] += p ;
	}
	}
	}
	// normalize
	for(size_t n_class=0; n_class<this->n_class; n_class++)
	{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
	{ for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
	{
	double p = std::max(this->post_prob(i,n_class,n_shift,n_flip) /
	this->post_prob_rowsum[i],
	ReadLayer::p_min) ;
	// double p = this->post_prob(i,n_class,n_shift,n_flip) /
	// this->post_prob_rowsum[i] ;
	this->post_prob(i,n_class,n_shift,n_flip) = p ;
	colsums[n_class] += p ;
	}
	}
	}
	}
	post_prob_colsum.set_value(colsums) ;
	}

	void EMJoint::update_models()
	{ // read data and models
	for(auto& ptr : this->read_layers)
	{ ptr->update_model(this->post_prob,
	this->post_prob_colsum,
	this->threads) ;
	}
	// sequence data and models
	if(this->seq_layer != nullptr)
	{ this->seq_layer->update_model(this->post_prob,
	this->threads) ;
	}
	// consensus sequence data and models
	if(this->consseq_layer != nullptr)
	{ this->consseq_layer->update_model(this->post_prob,
	this->threads) ;
	}
	}
	diff --git a/src/Clustering/EMJoint.hpp b/src/Clustering/EMJoint.hpp
	index aeea97b..3d9cb80 100644
	--- a/src/Clustering/EMJoint.hpp
	+++ b/src/Clustering/EMJoint.hpp
	@@ -1,408 +1,424 @@
	#ifndef EMJOINT_HPP
	#define EMJOINT_HPP


	#include <EMBase.hpp>

	#include <vector>
	#include <string>

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <ReadLayer.hpp>
	#include <SequenceLayer.hpp>
	#include <ConsensusSequenceLayer.hpp>


	typedef std::vector<double> vector_d ;

	class EMJoint : public EMBase
	{
	public:
	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* with the given shifting and flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param seq_matrix a matrix containing the DNA
	* sequences for the regions of interest.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* with the given shifting and flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param seq_matrix a matrix containing the DNA
	* sequences for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(std::vector<Matrix2D<int>>&& read_matrices,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* and sequences with the given shifting and
	* flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param seq_matrix a matrix containing the DNA
	* sequences for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
	const Matrix2D<int>& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* and sequences with the given shifting and
	* flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param seq_matrix a matrix containing the DNA
	* sequences for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(std::vector<Matrix2D<int>>&& read_matrices,
	Matrix2D<int>&& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* consensus sequences with the given shifting and
	* flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param consseq_matrix a matrix containing the DNA
	* consensus sequences for the regions of interest as
	* a probability matrix. Its dimensions are:
	* 1st the number of regions,
	* 2nd the region length.
	* 3rd 4 for A,C,G,T
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(const std::vector<Matrix2D<int>>& read_matrices,
	const Matrix3D<double>& consseq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* region according to all the given read densities
	* consensus sequences with the given shifting and
	* flipping freedom.
	* \param read_matrices a vector containing all
	* the different data densities (ChIP-seq or related
	* signal) for the regions of interest. The matrix
	* dimensions are as follow:
	* 1st the number of regions,
	* 2nd the region length.
	* \param consseq_matrix a matrix containing the DNA
	* consensus sequences for the regions of interest as
	* a probability matrix. Its dimensions are:
	* 1st the number of regions,
	* 2nd the region length.
	* 3rd 4 for A,C,G,T
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model
	* the background by setting all its parameters, at all
	* positions, to the background base probabilties (for
	* sequence models) and the mean number of counts (for
	* read signal models). Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMJoint(std::vector<Matrix2D<int>>&& read_matrices,
	Matrix3D<double>&& consseq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	EMJoint(const EMJoint& other) = delete ;

	/*!
	* \brief Destructor.
	*/
	virtual ~EMJoint() override ;

	/*!
	* \brief Returns all layer read models.
	* The models are in the same order
	* as the data were given to the
	* constructor.
	* \return a vector containing the
	* models.
	*/
	std::vector<Matrix3D<double>> get_read_models() const ;

	/*!
	* \brief Returns the sequence models.
	* \return a vector containing the
	* models.
	*/
	Matrix3D<double> get_sequence_models() const ;

	/*!
	* \brief Returns the consensus sequence
	* models.
	* \return a vector containing the
	* models.
	*/
	Matrix3D<double> get_consensus_sequence_models() const ;

	/*!
	* \brief Runs the sequence model optimization and
	* the data classification.
	* \return a code indicating how the optimization
	* ended.
	*/
	virtual EMJoint::exit_codes classify() override ;

	private:

	/*!
	* \brief Computes the data log likelihood given the
	* current models, for all layers and the joint
	* likelihood for each state (the sum of the layer
	* likelihoods for all layers, for a given state).
	+ * To avoid numerical issues when computing posterior
	+ * probabilities, the lowest possible value authorized
	+ * as log likelihood is ReadLayer::p_min_log. Any
	+ * value below is replaced by this one.
	*/
	virtual void compute_loglikelihood() override ;

	/*!
	* \brief This is a routine of compute_loglikelihood() that
	* computes the joint loglikelihood by summing the
	* individual loglikelihood obtained from each data layer.
	* At the same time, this method rescales the loglikelihood
	* values by substacting to each value the maximum
	* loglikelihood value found in the same data row,
	* for each layer.
	+ * To avoid numerical issues when computing posterior
	+ * probabilities, the lowest possible value authorized
	+ * as log likelihood is ReadLayer::p_min_log. Any
	+ * value below is replaced by this one.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done a promise to fill when the method
	* is done.
	*/
	void compute_loglikelihood_routine(size_t from,
	size_t to,
	std::promise<bool>& done) ;

	/*!
	* \brief Computes the data posterior probabilties.
	+ * To avoid numerical issues the lowest possible
	+ * value authorized as posterior probability is
	+ * ReadLayer::p_min. Any value below is replaced
	+ * by this one.
	*/
	virtual void compute_post_prob() override ;

	/*!
	* \brief The routine that effectively computes
	* the posterior probabilties.
	+ * To avoid numerical issues the lowest possible
	+ * value authorized as posterior probability is
	+ * ReadLayer::p_min. Any value below is replaced
	+ * by this one.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done the partial column (over the classes)
	* sum of posterior probabilities. If several routines
	* are running together, the colsums are retrieved by
	* summing up the vectors together.
	*/
	void compute_post_prob_routine(size_t from,
	size_t to,
	std::promise<vector_d>& post_prob_colsum) ;

	/*!
	* \brief Update the data models for all layers, given
	* the current posterior and class probabilities.
	*/
	virtual void update_models() override ;

	/*!
	* \brief the number of data layers.
	*/
	size_t n_layer ;
	/*!
	* \brief the log likelihood buffers for each individual
	* layer (one element per layer).
	*/
	std::vector<Matrix4D<double>> loglikelihood_layer ;
	/*!
	* \brief the max loglikelihood value for
	* each each data layer (1st dimension)
	* and each data row of the given layer
	* (2nd dimension).
	*/
	std::vector<vector_d> loglikelihood_max ;

	/*!
	* \brief A vector containing the pointers
	* to the objects managing all the read
	* data and models.
	*/
	std::vector<ReadLayer*> read_layers ;
	/*!
	* \brief A pointer to the object managing
	* the sequence data and their models.
	*/
	SequenceLayer* seq_layer ;
	/*!
	* \brief A pointer to the object managing
	* the consensus sequences data and their
	* models.
	*/
	ConsensusSequenceLayer* consseq_layer ;
	} ;

	#endif // EMJOINT_HPP
	diff --git a/src/Clustering/EMRead.cpp b/src/Clustering/EMRead.cpp
	index a8cb150..580f10f 100644
	--- a/src/Clustering/EMRead.cpp
	+++ b/src/Clustering/EMRead.cpp
	@@ -1,314 +1,315 @@
	#include <EMRead.hpp>

	#include <string>
	#include <vector>
	#include <future> // std::promise, std::future
	#include <utility> // std::pair, std::move()
	#include <functional> // std::bind(), std::ref()
	#include <cmath> // exp()

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <ReadLayer.hpp> // ReadLayer
	#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
	#include <ConsoleProgressBar.hpp> // ConsoleProgressBar
	#include <ThreadPool.hpp> // ThreadPool


	EMRead::EMRead(const Matrix2D<int>& read_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(read_matrix.get_nrow(),
	read_matrix.get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	loglikelihood_max(n_row, 0.),
	read_layer(nullptr)
	{ this->loglikelihood_max = vector_d(n_row, 0.) ;

	// initialise post prob randomly
	this->set_post_prob_random(seed) ;
	// data and models
	this->read_layer = new ReadLayer(read_matrix,
	this->n_class,
	this->n_shift,
	flip,
	bckg_class,
	this->threads) ;
	// intialise the models with the post prob
	this->read_layer->update_model(this->post_prob,
	this->threads) ;
	}

	EMRead::EMRead(Matrix2D<int>&& read_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(read_matrix.get_nrow(),
	read_matrix.get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	loglikelihood_max(n_row, 0.),
	read_layer(nullptr)
	{ this->loglikelihood_max = vector_d(n_row, 0.) ;

	// initialise post prob randomly
	this->set_post_prob_random(seed) ;

	// data and models
	this->read_layer = new ReadLayer(std::move(read_matrix),
	this->n_class,
	this->n_shift,
	flip,
	bckg_class,
	this->threads) ;
	// intialise the models with the post prob
	this->read_layer->update_model(this->post_prob,
	this->threads) ;
	}

	EMRead::~EMRead()
	{ if(this->read_layer!= nullptr)
	{ delete this->read_layer ;
	this->read_layer = nullptr ;
	}
	if(this->threads != nullptr)
	{ this->threads->join() ;
	delete this->threads ;
	this->threads = nullptr ;
	}
	}

	Matrix3D<double> EMRead::get_read_models() const
	{ return read_layer->get_model() ; }

	EMRead::exit_codes EMRead::classify()
	{
	size_t bar_update_n = this->n_iter ;
	ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;

	// optimize the partition
	for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
	{
	// E-step
	this->compute_loglikelihood() ;
	// std::cerr << this->post_prob_rowsum << std::endl ;
	// std::cerr << this->post_prob_colsum << std::endl ;
	this->compute_post_prob() ;
	// M-step
	// std::cerr << this->post_prob_rowsum << std::endl ;
	// std::cerr << this->post_prob_colsum << std::endl ;
	this->compute_class_prob() ;
	this->update_models() ;
	this->center_post_state_prob() ;

	bar.update() ;
	}
	bar.update() ; std::cerr << std::endl ;
	return EMRead::exit_codes::ITER_MAX ;
	}

	void EMRead::compute_loglikelihood()
	{ // compute the loglikelihood
	this->read_layer->compute_loglikelihoods(this->loglikelihood,
	this->loglikelihood_max,
	this->threads) ;

	/*
	// rescale the values
	for(size_t i=0; i<this->n_row; i++)
	{ for(size_t j=0; j<this->n_class; j++)
	{ for(size_t k=0; k<this->n_shift; k++)
	{ for(size_t l=0; l<this->n_flip; l++)
	{ this->loglikelihood(i,j,k,l) =
	(this->loglikelihood(i,j,k,l) -
	this->loglikelihood_max[i]) ;
	}
	}
	}
	}
	*/

	// rescale the values
	// don't parallelize
	if(this->threads == nullptr)
	{ std::promise<bool> promise ;
	std::future<bool> future = promise.get_future() ;
	this->compute_loglikelihood_routine(0,
	this->n_row,
	promise) ;
	future.get() ;
	}
	// parallelize
	else
	{ size_t n_threads = this->threads->getNThread() ;

	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row,n_threads) ;

	// get promises and futures
	std::vector<std::promise<bool>> promises(n_threads) ;
	std::vector<std::future<bool>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }

	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	this->threads->addJob(std::move(
	std::bind(&EMRead::compute_loglikelihood_routine,
	this,
	slice.first,
	slice.second,
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	for(auto& future : futures)
	{ future.get() ; }
	// -------------------------- threads stop ---------------------------
	}
	}

	void EMRead::compute_loglikelihood_routine(size_t from,
	size_t to,
	std::promise<bool>& done)
	{
	// rescale the values
	for(size_t i=from; i<to; i++)
	{ for(size_t j=0; j<this->n_class; j++)
	{ for(size_t k=0; k<this->n_shift; k++)
	{ for(size_t l=0; l<this->n_flip; l++)
	{ this->loglikelihood(i,j,k,l) =
	- (this->loglikelihood(i,j,k,l) -
	- this->loglikelihood_max[i]) ;
	+ std::max(this->loglikelihood(i,j,k,l) -
	+ this->loglikelihood_max[i],
	+ ReadLayer::p_min_log) ;
	}
	}
	}
	}
	done.set_value(true) ;
	}

	void EMRead::compute_post_prob()
	{ // don't parallelize
	if(this->threads == nullptr)
	{ std::promise<vector_d> promise ;
	std::future<vector_d> future = promise.get_future() ;
	this->compute_post_prob_routine(0, this->n_row, promise) ;
	// compute the sum of post prob and the per class sum of post prob
	// from the partial results computed on each slice
	this->post_prob_tot = 0. ;
	this->post_prob_colsum = future.get() ;
	for(const auto& prob : this->post_prob_colsum)
	{ this->post_prob_tot += prob ; }
	}
	// parallelize
	else
	{ size_t n_threads = this->threads->getNThread() ;

	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row,n_threads) ;

	// get promises and futures
	// the function run by the threads will compute
	// the partial sum per class of post_prob for the given slice
	// this should be used to compute the complete sum of post_prob
	// and the complete sum per class of post_prob
	std::vector<std::promise<vector_d>> promises(n_threads) ;
	std::vector<std::future<vector_d>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }

	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	this->threads->addJob(std::move(
	std::bind(&EMRead::compute_post_prob_routine,
	this,
	slice.first,
	slice.second,
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	// compute the sum of post prob and the per class sum of post prob
	// from the partial results computed on each slice
	this->post_prob_tot = 0. ;
	this->post_prob_colsum = vector_d(this->n_class, 0.) ;
	for(auto& future : futures)
	{ auto probs = future.get() ;
	for(size_t i=0; i<this->n_class; i++)
	{ double prob = probs[i] ;
	this->post_prob_colsum[i] += prob ;
	this->post_prob_tot += prob ;
	}
	}
	// -------------------------- threads stop ---------------------------
	}
	}


	void EMRead::compute_post_prob_routine(size_t from,
	size_t to,
	std::promise<vector_d>& post_prob_colsum)
	{ vector_d colsums(this->n_class, 0.) ;

	// reset grand total
	// this->post_prob_tot = 0 ;
	// this->post_prob_colsum = vector_d(n_class, 0) ;

	// post prob
	for(size_t i=from; i<to; i++)
	{ // reset row sum to 0
	this->post_prob_rowsum[i] = 0. ;
	for(size_t n_class=0; n_class<this->n_class; n_class++)
	{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
	{ for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
	{
	double p = exp(this->loglikelihood(i,n_class,n_shift,n_flip)) *
	this->post_state_prob(n_class,n_shift,n_flip) ;
	this->post_prob(i,n_class,n_shift,n_flip) = p ;
	this->post_prob_rowsum[i] += p ;
	}
	}
	}

	// normalize
	for(size_t n_class=0; n_class<this->n_class; n_class++)
	{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
	{ for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
	{ // avoid p=0. by rounding errors
	double p = std::max(this->post_prob(i,n_class,n_shift,n_flip) /
	this->post_prob_rowsum[i],
	ReadLayer::p_min) ;
	this->post_prob(i,n_class,n_shift,n_flip) = p ;
	colsums[n_class] += p ;
	}
	}
	}
	}
	post_prob_colsum.set_value(colsums) ;
	}

	void EMRead::update_models()
	{ this->read_layer->update_model(this->post_prob,
	this->post_prob_colsum,
	this->threads) ;
	}
	diff --git a/src/Clustering/EMRead.hpp b/src/Clustering/EMRead.hpp
	index 0c49b6a..e3e727a 100644
	--- a/src/Clustering/EMRead.hpp
	+++ b/src/Clustering/EMRead.hpp
	@@ -1,173 +1,188 @@
	#ifndef EMREAD_HPP
	#define EMREAD_HPP

	#include <EMBase.hpp>

	#include <vector>
	#include <string>
	#include <future> // std::promise

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <ReadLayer.hpp>


	typedef std::vector<double> vector_d ;


	class EMRead : public EMBase
	{
	public:
	/*!
	* \brief Constructs an object to partition the
	* region (rows) according to the shape of the signal
	* with the given shifting and flipping freedom.
	* \param read_matrix a matrix containing the read
	* densitiy (ChIP-seq or related signal) for the
	* regions of interest.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model the
	* background by setting all its parameters, at all
	* positions, to the mean number of counts. Since
	* the background is constant, this class will never
	* be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMRead(const Matrix2D<int>& read_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* region (rows) according to the shape of the signal
	* with the given shifting and flipping freedom.
	* \param read_matrix a matrix containing the read
	* densitiy (ChIP-seq or related signal) for the
	* regions of interest.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model the
	* background by setting all its parameters, at all
	* positions, to the mean number of counts. Since
	* the background is constant, this class will never
	* be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMRead(Matrix2D<int>&& read_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	EMRead(const EMRead& other) = delete ;

	/*!
	* \brief Destructor.
	*/
	virtual ~EMRead() override ;

	/*!
	* \brief Returns the class read signal model.
	* \return the class read signal model.
	*/
	Matrix3D<double> get_read_models() const ;

	/*!
	* \brief Runs the read signal model optimization and
	* the data classification.
	* \return a code indicating how the optimization
	* ended.
	*/
	virtual EMRead::exit_codes classify() override ;

	private:

	/*!
	* \brief Computes the data log likelihood given the
	* current models, for all layers and the joint
	* likelihood for each state (the sum of the layer
	* likelihoods for all layers, for a given state).
	+ * To avoid numerical issues when computing posterior
	+ * probabilities, the lowest possible value authorized
	+ * as log likelihood is ReadLayer::p_min_log. Any
	+ * value below is replaced by this one.
	*/
	virtual void compute_loglikelihood() override ;

	/*!
	* \brief This is a routine of compute_loglikelihood().
	* This method rescales the loglikelihood values by
	* substacting to each value the maximum loglikelihood
	* value found in the same data row.
	- * This method
	+ * To avoid numerical issues when computing posterior
	+ * probabilities, the lowest possible value authorized
	+ * as log likelihood is ReadLayer::p_min_log. Any
	+ * value below is replaced by this one.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done a promise to fill when the method
	* is done.
	*/
	void compute_loglikelihood_routine(size_t from,
	size_t to,
	std::promise<bool>& done) ;

	/*!
	* \brief Computes the data posterior probabilties.
	+ * To avoid numerical issues the lowest possible
	+ * value authorized as posterior probability is
	+ * ReadLayer::p_min. Any value below is replaced
	+ * by this one.
	*/
	virtual void compute_post_prob() override ;

	/*!
	* \brief The routine that effectively computes
	* the posterior probabilties.
	+ * To avoid numerical issues the lowest possible
	+ * value authorized as posterior probability is
	+ * ReadLayer::p_min. Any value below is replaced
	+ * by this one.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done the partial column (over the classes)
	* sum of posterior probabilities. If several routines
	* are running together, the colsums are retrieved by
	* summing up the vectors together.
	*/
	void compute_post_prob_routine(size_t from,
	size_t to,
	std::promise<vector_d>& post_prob_colsum) ;

	/*!
	* \brief Update the data models for all layers, given
	* the current posterior and class probabilities.
	*/
	virtual void update_models() override ;

	/*!
	* \brief the max loglikelihood value for
	* each data row.
	*/
	std::vector<double> loglikelihood_max ;
	/*!
	* \brief A pointer to the object managing
	* the data and their model.
	*/
	ReadLayer* read_layer ;

	} ;

	#endif // EMREAD_HPP
	diff --git a/src/Clustering/EMSequence.cpp b/src/Clustering/EMSequence.cpp
	index 3f5bebb..4844f2c 100644
	--- a/src/Clustering/EMSequence.cpp
	+++ b/src/Clustering/EMSequence.cpp
	@@ -1,357 +1,358 @@
	#include <EMSequence.hpp>

	#include <string>
	#include <vector>
	#include <future> // std::promise, std::future
	#include <utility> // std::pair, std::move()
	#include <functional> // std::bind(), std::ref()
	#include <cmath> // exp()

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <SequenceLayer.hpp> // SequenceLayer
	#include <RandomNumberGenerator.hpp> // getRandomNumberGenerator()
	#include <ConsoleProgressBar.hpp> // ConsoleProgressBar
	#include <ThreadPool.hpp> // ThreadPool


	EMSequence::EMSequence(const Matrix2D<int>& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(seq_matrix.get_nrow(),
	seq_matrix.get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	loglikelihood_max(n_row, 0.),
	seq_layer(nullptr)
	{
	this->loglikelihood_max = vector_d(n_row, 0.) ;

	// initialise post prob randomly
	// getRandomGenerator(seed) ;
	this->set_post_prob_random(seed) ;

	// data and models
	this->seq_layer = new SequenceLayer(seq_matrix,
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class) ;

	// intialise the models with the post prob
	this->seq_layer->update_model(this->post_prob,
	this->threads) ;
	}


	EMSequence::EMSequence(Matrix2D<int>&& seq_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_threads)
	: EMBase(seq_matrix.get_nrow(),
	seq_matrix.get_ncol(),
	n_class,
	n_iter,
	n_shift,
	flip,
	n_threads),
	loglikelihood_max(n_row, 0.),
	seq_layer(nullptr)
	{
	this->loglikelihood_max = vector_d(n_row, 0.) ;

	// initialise post prob randomly
	// getRandomGenerator(seed) ;
	this->set_post_prob_random(seed) ;

	// data and models
	this->seq_layer = new SequenceLayer(std::move(seq_matrix),
	this->n_class,
	this->n_shift,
	this->flip,
	bckg_class) ;

	// intialise the models with the post prob
	this->seq_layer->update_model(this->post_prob,
	this->threads) ;
	}


	EMSequence::EMSequence(const Matrix2D<int>& seq_matrix,
	const Matrix3D<double>& motifs,
	size_t n_iter,
	bool flip,
	bool bckg_class,
	size_t n_threads)
	: EMBase(seq_matrix.get_nrow(),
	seq_matrix.get_ncol(),
	motifs.get_dim()[0],
	n_iter,
	seq_matrix.get_ncol() - motifs.get_dim()[1] + 1,
	flip,
	n_threads),
	loglikelihood_max(n_row, 0.),
	seq_layer(nullptr)
	{

	this->loglikelihood_max = vector_d(n_row, 0.) ;

	// data and models
	// background motif (if any) is the last of the given motifs
	this->seq_layer = new SequenceLayer(seq_matrix,
	motifs,
	this->flip,
	bckg_class) ;

	// intialise the class prob uniformly
	this->set_state_prob_uniform() ;
	}

	EMSequence::EMSequence(Matrix2D<int>&& seq_matrix,
	Matrix3D<double>&& motifs,
	size_t n_iter,
	bool flip,
	bool bckg_class,
	size_t n_threads)
	: EMBase(seq_matrix.get_nrow(),
	seq_matrix.get_ncol(),
	motifs.get_dim()[0],
	n_iter,
	seq_matrix.get_ncol() - motifs.get_dim()[1] + 1,
	flip,
	n_threads),
	loglikelihood_max(n_row, 0.),
	seq_layer(nullptr)
	{

	this->loglikelihood_max = vector_d(n_row, 0.) ;

	// data and models
	// background motif (if any) is the last of the given motifs
	this->seq_layer = new SequenceLayer(std::move(seq_matrix),
	std::move(motifs),
	this->flip,
	bckg_class) ;

	// intialise the class prob uniformly
	this->set_state_prob_uniform() ;
	}


	EMSequence::~EMSequence()
	{ if(this->seq_layer != nullptr)
	{ delete this->seq_layer ;
	this->seq_layer = nullptr ;
	}
	if(this->threads != nullptr)
	{ this->threads->join() ;
	delete this->threads ;
	this->threads = nullptr ;
	}
	}

	Matrix3D<double> EMSequence::get_sequence_models() const
	{ return seq_layer->get_model() ; }

	EMSequence::exit_codes EMSequence::classify()
	{ size_t bar_update_n = this->n_iter ;
	ConsoleProgressBar bar(std::cerr, bar_update_n, 60, "classifying") ;

	// optimize the partition
	for(size_t n_iter=0; n_iter<this->n_iter; n_iter++)
	{ // E-step
	this->compute_loglikelihood() ;
	this->compute_post_prob() ;
	// M-step
	this->compute_class_prob() ;
	this->update_models() ;
	this->center_post_state_prob() ;
	bar.update() ;
	}
	bar.update() ; std::cerr << std::endl ;
	return EMSequence::exit_codes::ITER_MAX ;
	}

	void EMSequence::compute_loglikelihood()
	{ // compute the loglikelihood
	this->seq_layer->compute_loglikelihoods(this->loglikelihood,
	this->loglikelihood_max,
	this->threads) ;
	// rescale the values
	// don't parallelize
	if(this->threads == nullptr)
	{ std::promise<bool> promise ;
	std::future<bool> future = promise.get_future() ;
	this->compute_loglikelihood_routine(0,
	this->n_row,
	promise) ;
	future.get() ;
	}
	// parallelize
	else
	{ size_t n_threads = this->threads->getNThread() ;

	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row,n_threads) ;

	// get promises and futures
	std::vector<std::promise<bool>> promises(n_threads) ;
	std::vector<std::future<bool>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }

	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	this->threads->addJob(std::move(
	std::bind(&EMSequence::compute_loglikelihood_routine,
	this,
	slice.first,
	slice.second,
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	for(auto& future : futures)
	{ future.get() ; }
	// -------------------------- threads stop ---------------------------
	}
	}

	void EMSequence::compute_loglikelihood_routine(size_t from,
	size_t to,
	std::promise<bool>& done)
	{
	// rescale the values
	for(size_t i=from; i<to; i++)
	{ for(size_t j=0; j<this->n_class; j++)
	{ for(size_t k=0; k<this->n_shift; k++)
	{ for(size_t l=0; l<this->n_flip; l++)
	{ this->loglikelihood(i,j,k,l) =
	- (this->loglikelihood(i,j,k,l) -
	- this->loglikelihood_max[i]) ;
	+ std::max(this->loglikelihood(i,j,k,l) -
	+ this->loglikelihood_max[i],
	+ SequenceLayer::p_min_log) ;
	}
	}
	}
	}
	done.set_value(true) ;
	}

	void EMSequence::compute_post_prob()
	{ // don't parallelize
	if(this->threads == nullptr)
	{ std::promise<vector_d> promise ;
	std::future<vector_d> future = promise.get_future() ;
	this->compute_post_prob_routine(0, this->n_row, promise) ;
	// compute the sum of post prob and the per class sum of post prob
	// from the partial results computed on each slice
	this->post_prob_tot = 0. ;
	this->post_prob_colsum = future.get() ;
	for(const auto& prob : this->post_prob_colsum)
	{ this->post_prob_tot += prob ; }
	}
	// parallelize
	else
	{ size_t n_threads = this->threads->getNThread() ;

	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row,n_threads) ;

	// get promises and futures
	// the function run by the threads will compute
	// the partial sum per class of post_prob for the given slice
	// this should be used to compute the complete sum of post_prob
	// and the complete sum per class of post_prob
	std::vector<std::promise<vector_d>> promises(n_threads) ;
	std::vector<std::future<vector_d>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }

	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	this->threads->addJob(std::move(
	std::bind(&EMSequence::compute_post_prob_routine,
	this,
	slice.first,
	slice.second,
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	// compute the sum of post prob and the per class sum of post prob
	// from the partial results computed on each slice
	this->post_prob_tot = 0. ;
	this->post_prob_colsum = vector_d(this->n_class, 0.) ;
	for(auto& future : futures)
	{ auto probs = future.get() ;
	for(size_t i=0; i<this->n_class; i++)
	{ double prob = probs[i] ;
	this->post_prob_colsum[i] += prob ;
	this->post_prob_tot += prob ;
	}
	}
	// -------------------------- threads stop ---------------------------
	}
	}


	void EMSequence::compute_post_prob_routine(size_t from,
	size_t to,
	std::promise<vector_d>& post_prob_colsum)
	{ vector_d colsums(this->n_class, 0.) ;

	// reset grand total
	// this->post_prob_tot = 0 ;
	// this->post_prob_colsum = vector_d(n_class, 0) ;

	// post prob
	for(size_t i=from; i<to; i++)
	{ // reset row sum to 0
	this->post_prob_rowsum[i] = 0. ;
	for(size_t n_class=0; n_class<this->n_class; n_class++)
	{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
	{ for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
	{
	double p = exp(this->loglikelihood(i,n_class,n_shift,n_flip)) *
	this->post_state_prob(n_class,n_shift,n_flip) ;
	this->post_prob(i,n_class,n_shift,n_flip) = p ;
	this->post_prob_rowsum[i] += p ;
	}
	}
	}
	// normalize
	for(size_t n_class=0; n_class<this->n_class; n_class++)
	{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
	{ for(size_t n_flip=0; n_flip<this->n_flip; n_flip++)
	{
	double p = std::max(this->post_prob(i,n_class,n_shift,n_flip) /
	this->post_prob_rowsum[i],
	SequenceLayer::p_min) ;
	this->post_prob(i,n_class,n_shift,n_flip) = p ;
	colsums[n_class] += p ;
	}
	}
	}
	}
	post_prob_colsum.set_value(colsums) ;
	}

	void EMSequence::update_models()
	{ this->seq_layer->update_model(this->post_prob,
	this->threads) ;
	}
	diff --git a/src/Clustering/EMSequence.hpp b/src/Clustering/EMSequence.hpp
	index 622532b..537c285 100644
	--- a/src/Clustering/EMSequence.hpp
	+++ b/src/Clustering/EMSequence.hpp
	@@ -1,240 +1,255 @@
	#ifndef EMSEQUENCE_HPP
	#define EMSEQUENCE_HPP

	#include <EMBase.hpp>

	#include <vector>
	#include <string>
	#include <future> // std::promise

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <SequenceLayer.hpp>


	typedef std::vector<double> vector_d ;


	class EMSequence : public EMBase
	{
	public:
	/*!
	* \brief Constructs an object to partition the
	* given sequences (rows) according to their motif
	* content.
	* The sequences models are initialised randomly.
	* \param sequence_matrix a matrix containing the sequences
	* of interest.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model the background
	* by setting all its parameters, at all positions, to the
	* background base probabilties. Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMSequence(const Matrix2D<int>& sequence_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;
	/*!
	* \brief Constructs an object to partition the
	* given sequences (rows) according to their motif
	* content.
	* The sequences models are initialised randomly.
	* \param sequence_matrix a matrix containing the sequences
	* of interest.
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param n_shift the number of shift states allowed.
	* \param flip whether flipping is allowed.
	* \param bckg_class the last class is used to model the background
	* by setting all its parameters, at all positions, to the
	* background base probabilties. Since the background is constant,
	* this class will never be updated.
	* \param seed a seed to initialise the random number
	* generator.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMSequence(Matrix2D<int>&& sequence_matrix,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed="",
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* given sequences (rows) according to their motif
	* content.
	* The sequences class models are initialised using
	* the given motifs. The class probabilities are
	* initialised uniformlly.
	* The shifting freedom is set to (data number
	* of columns) - (the model 2nd dimension)
	* + 1.
	* \param sequence_matrix a matrix containing the sequences
	* of interest.
	* \param motifs a matrix containing the different initial
	* class models with the following dimensions :
	* dim1 the number of classes
	* dim2 the model length
	* dim3 4 for A,C,G,T
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param flip whether flipping is allowed.
	* \param bckg_class indicates that the last class in the
	* given motifs is used to model the background and it
	* should never be updated.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMSequence(const Matrix2D<int>& sequence_matrix,
	const Matrix3D<double>& motifs,
	size_t n_iter,
	bool flip,
	bool bckg_class,
	size_t n_threads=0) ;

	/*!
	* \brief Constructs an object to partition the
	* given sequences (rows) according to their motif
	* content.
	* The sequences class models are initialised using
	* the given motifs. The class probabilities are
	* initialised uniformlly.
	* The shifting freedom is set to (data number
	* of columns) - (the model 2nd dimension)
	* + 1.
	* \param sequence_matrix a matrix containing the sequences
	* of interest.
	* \param motifs a matrix containing the different initial
	* class models with the following dimensions :
	* dim1 the number of classes
	* dim2 the model length
	* dim3 4 for A,C,G,T
	* \param n_class the number of region classes
	* to search.
	* \param n_iter the number of optimization iterations.
	* \param flip whether flipping is allowed.
	* \param bckg_class indicates that the last class in the
	* given motifs is used to model the background and it
	* should never be updated.
	* \param n_threads the number of parallel threads
	* to run the computations. 0 means no parallel
	* computing, everything is run on the main thread.
	*/
	EMSequence(Matrix2D<int>&& sequence_matrix,
	Matrix3D<double>&& motifs,
	size_t n_iter,
	bool flip,
	bool bckg_class,
	size_t n_threads=0) ;

	EMSequence(const EMSequence& other) = delete ;

	/*!
	* \brief Destructor.
	*/
	virtual ~EMSequence() override ;

	/*!
	* \brief Returns the class sequence model.
	* \return the class sequence model.
	*/
	Matrix3D<double> get_sequence_models() const ;

	/*!
	* \brief Runs the sequence model optimization and
	* the data classification.
	* \return a code indicating how the optimization
	* ended.
	*/
	virtual EMSequence::exit_codes classify() override ;

	private:

	/*!
	* \brief Computes the data log likelihood given the
	* current models, for all layers and the joint
	* likelihood for each state (the sum of the layer
	* likelihoods for all layers, for a given state).
	+ * To avoid numerical issues when computing posterior
	+ * probabilities, the lowest possible value authorized
	+ * as log likelihood is SequenceLayer::p_min_log. Any
	+ * value below is replaced by this one.
	*/
	virtual void compute_loglikelihood() override ;

	/*!
	* \brief This is a routine of compute_loglikelihood().
	* This method rescales the loglikelihood values by
	* substacting to each value the maximum loglikelihood
	* value found in the same data row.
	- * This method
	+ * To avoid numerical issues when computing posterior
	+ * probabilities, the lowest possible value authorized
	+ * as log likelihood is ReadLayer::p_min_log. Any
	+ * value below is replaced by this one.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done a promise to fill when the method
	* is done.
	*/
	void compute_loglikelihood_routine(size_t from,
	size_t to,
	std::promise<bool>& done) ;

	/*!
	* \brief Computes the data posterior probabilties.
	+ * To avoid numerical issues the lowest possible
	+ * value authorized as posterior probability is
	+ * SequenceLayer::p_min. Any value below is replaced
	+ * by this one.
	*/
	virtual void compute_post_prob() override ;

	/*!
	* \brief The routine that effectively computes
	* the posterior probabilties.
	+ * To avoid numerical issues the lowest possible
	+ * value authorized as posterior probability is
	+ * SequenceLayer::p_min. Any value below is replaced
	+ * by this one.
	* \param from the index of the first row
	* in the data to consider.
	* \param to the index of the past last row
	* in the data to consider.
	* \param done the partial column (over the classes)
	* sum of posterior probabilities. If several routines
	* are running together, the colsums are retrieved by
	* summing up the vectors together.
	*/
	void compute_post_prob_routine(size_t from,
	size_t to,
	std::promise<vector_d>& post_prob_colsum) ;

	/*!
	* \brief Update the data models for all layers, given
	* the current posterior and class probabilities.
	*/
	virtual void update_models() override ;

	/*!
	* \brief the max loglikelihood value for
	* each data row.
	*/
	std::vector<double> loglikelihood_max ;
	/*!
	* \brief A pointer to the object managing
	* the data and their model.
	*/
	SequenceLayer* seq_layer ;

	} ;

	#endif // EMSEQUENCE_HPP
	diff --git a/src/Clustering/ReadLayer.cpp b/src/Clustering/ReadLayer.cpp
	index 3c0e70d..9370245 100644
	--- a/src/Clustering/ReadLayer.cpp
	+++ b/src/Clustering/ReadLayer.cpp
	@@ -1,492 +1,496 @@
	#include <ReadLayer.hpp>

	#include <stdexcept> // std::invalid_argument
	#include <limits> // numeric_limits
	#include <cmath> // log(), exp(), pow()
	#include <vector>
	#include <future> // std::promise, std::future
	#include <utility> // std::pair, std::move()
	#include <functional> // std::bind(), std::ref()

	#include <Statistics.hpp> // poisson_pmf()
	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <ThreadPool.hpp>

	#include <iostream>

	typedef std::vector<double> vector_d ;

	ReadLayer::ReadLayer(const Matrix2D<int>& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	ThreadPool* threads)
	: Data2DLayer(data, n_class, n_shift, flip, bckg_class),
	window_means(n_row, n_shift, 0.)
	{ this->n_category = 1 ;
	// initialise the empty model
	this->model = Matrix3D<double>(this->n_class,
	this->l_model,
	this->n_category,
	0) ;
	// background class
	if(this->bckg_class)
	{ this->create_bckg_class() ; }

	// compute window means
	this->compute_window_means(threads) ;
	}

	ReadLayer::ReadLayer(Matrix2D<int>&& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	ThreadPool* threads)
	: Data2DLayer(std::move(data), n_class, n_shift, flip, bckg_class),
	window_means(n_row, n_shift, 0.)
	{ this->n_category = 1 ;
	// initialise the empty model
	this->model = Matrix3D<double>(this->n_class,
	this->l_model,
	this->n_category,
	0) ;

	// background class
	if(this->bckg_class)
	{ this->create_bckg_class() ; }

	// compute window means
	this->compute_window_means(threads) ;
	}

	ReadLayer::ReadLayer(const Matrix2D<int>& data,
	const Matrix3D<double>& model,
	bool flip,
	bool bckg_class,
	ThreadPool* threads)
	: Data2DLayer(data, model, flip, bckg_class),
	window_means(n_row, n_shift, 0.)
	{ // check that the model only has one category
	if(this->n_category > 1)
	{ char msg[4096] ;
	sprintf(msg,
	"Error! model is expected to have length 1 on "
	"3rd dimension, not %zu",
	this->n_category) ;
	throw std::invalid_argument(msg) ;
	}
	// compute window means
	this->compute_window_means(threads) ;
	}

	ReadLayer::ReadLayer(Matrix2D<int>&& data,
	Matrix3D<double>&& model,
	bool flip,
	bool bckg_class,
	ThreadPool* threads)
	: Data2DLayer(std::move(data), std::move(model), flip, bckg_class),
	window_means(n_row, n_shift, 0.)
	{ // check that the model only has one category
	if(this->n_category > 1)
	{ char msg[4096] ;
	sprintf(msg,
	"Error! model is expected to have length 1 on "
	"3rd dimension, not %zu",
	this->n_category) ;
	throw std::invalid_argument(msg) ;
	}
	// compute window means
	this->compute_window_means(threads) ;
	}


	ReadLayer::~ReadLayer()
	{}

	void ReadLayer::compute_loglikelihoods(Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	ThreadPool* threads) const
	{ // dimension checks
	this->check_loglikelihood_dim(loglikelihood) ;
	this->check_loglikelihood_max_dim(loglikelihood_max) ;

	// don't parallelize
	if(threads == nullptr)
	{ std::promise<bool> promise ;
	std::future<bool> future = promise.get_future() ;
	this->compute_loglikelihoods_routine(0,
	this->n_row,
	std::ref(loglikelihood),
	std::ref(loglikelihood_max),
	promise) ;
	future.get() ;
	}
	// parallelize
	else
	{ size_t n_threads = threads->getNThread() ;
	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row, n_threads) ;

	// get promises and futures
	// the function run by the threads will simply fill the promise with
	// "true" to indicate that they are done
	std::vector<std::promise<bool>> promises(n_threads) ;
	std::vector<std::future<bool>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }
	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	threads->addJob(std::move(
	std::bind(&ReadLayer::compute_loglikelihoods_routine,
	this,
	slice.first,
	slice.second,
	std::ref(loglikelihood),
	std::ref(loglikelihood_max),
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	for(auto& future : futures)
	{ future.get() ; }
	// -------------------------- threads stop ---------------------------
	}
	}


	void ReadLayer::compute_loglikelihoods_routine(size_t from,
	size_t to,
	Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	std::promise<bool>& done) const
	{
	// normalize the models
	Matrix3D<double> model_norm = this->model ;
	for(size_t i=0; i<this->n_class; i++)
	{ double mean = 0. ;
	for(size_t j=0; j<this->l_model; j++)
	{ mean += model_norm(i,j,0) ; }
	mean /= this->l_model ;
	for(size_t j=0; j<this->l_model; j++)
	{ model_norm(i,j,0) /= mean ; }
	}

	// compute log likelihood
	for(size_t i=from; i<to; i++)
	{
	// set max to min possible value
	loglikelihood_max[i] = std::numeric_limits<double>::lowest() ;

	for(size_t j=0; j<this->n_class; j++)
	{ for(size_t s_fw=0, s_rev=this->n_shift-1;
	s_fw<this->n_shift; s_fw++, s_rev--)
	{ // slice is [from_fw,to)
	// from_dat_fw to_dat_fw [from_dat_fw, to_dat_fw]
	// fw \|---------->>>----------\|
	// ----------------------------------> data
	// rev \|----------<<<----------\| [from_dat_rev, to_dat_rev]
	// to_dat_rev can be -1 -> int
	// to_dat_rev from_dat_rev

	// log likelihood
	double ll_fw = 0. ;
	double ll_rev = 0. ;
	// --------------- forward ---------------
	size_t from_dat_fw = s_fw ;
	size_t to_dat_fw = from_dat_fw + this->l_model - 1 ;
	// --------------- reverse ---------------
	size_t from_dat_rev = this->n_col - 1 - s_fw ;
	// size_t to_dat_rev = from_dat_rev - (this->l_model - 1) ;

	for(size_t j_dat_fw=from_dat_fw,j_ref_fw=0, j_dat_rev=from_dat_rev;
	j_dat_fw<to_dat_fw;
	j_dat_fw++, j_ref_fw++, j_dat_rev--)
	{
	double ll ;
	// --------------- forward ---------------
	ll = log(poisson_pmf(this->data(i,j_dat_fw),
	model_norm(j,j_ref_fw,0)*
	this->window_means(i,s_fw))) ;
	- // ll_fw += ll ;
	// p(A\|B) may be really unlikely -> rounded to 0 -> log(0) = -inf
	- ll_fw += std::max(ll, ReadLayer::p_min_log) ;
	+ // prevent this by applying this
	+ if(isinf(ll))
	+ { ll = ReadLayer::p_min_log ; }
	+ ll_fw += ll ;
	// --------------- reverse ---------------
	if(this->flip)
	{ ll = log(poisson_pmf(this->data(i,j_dat_rev),
	model_norm(j,j_ref_fw,0)*
	this->window_means(i,s_rev))) ;
	- // ll_rev += ll ;
	// p(A\|B) may be really unlikely -> rounded to 0 -> log(0) = -inf
	- ll_rev += std::max(ll, ReadLayer::p_min_log) ;
	+ // prevent this by applying this
	+ if(isinf(ll))
	+ { ll = ReadLayer::p_min_log ; }
	+ ll_rev += ll ;
	}
	}
	loglikelihood(i,j,from_dat_fw,flip_states::FORWARD) = ll_fw ;
	// keep track of the max per row
	if(ll_fw > loglikelihood_max[i])
	{ loglikelihood_max[i] = ll_fw ; }

	if(this->flip)
	{ loglikelihood(i,j,from_dat_fw,flip_states::REVERSE) = ll_rev ;
	// keep track of the max per row
	if(ll_rev > loglikelihood_max[i])
	{ loglikelihood_max[i] = ll_rev ; }
	}
	}
	}
	}
	done.set_value(true) ;
	}


	void ReadLayer::update_model(const Matrix4D<double>& posterior_prob,
	ThreadPool* threads)
	{
	// computing sum over the columns (classes)
	size_t n_row = posterior_prob.get_dim()[0] ;
	size_t n_class = posterior_prob.get_dim()[1] ;
	size_t n_shift = posterior_prob.get_dim()[2] ;
	size_t n_flip = posterior_prob.get_dim()[3] ;
	vector_d colsum(n_class, 0.) ;
	for(size_t i=0; i<n_row; i++)
	{ for(size_t j=0; j<n_class; j++)
	{ for(size_t k=0; k<n_shift; k++)
	{ for(size_t l=0; l<n_flip; l++)
	{ colsum[j] += posterior_prob(i,j,k,l) ; }
	}
	}
	}
	this->update_model(posterior_prob,
	colsum,
	threads) ;
	}

	void ReadLayer::update_model(const Matrix4D<double>& posterior_prob,
	const vector_d& posterior_prob_colsum,
	ThreadPool* threads)
	{
	// don't parallelize
	if(threads == nullptr)
	{ std::promise<Matrix3D<double>> promise ;
	std::future<Matrix3D<double>> future = promise.get_future() ;
	this->update_model_routine(0,
	this->n_row,
	posterior_prob,
	posterior_prob_colsum,
	promise) ;
	// this->model = future.get() ;
	auto model = future.get() ;
	size_t n_class_to_update = this->n_class - this->bckg_class ;
	for(size_t i=0; i<n_class_to_update; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) = model(i,j,k) ; }
	}
	}
	}
	// parallelize
	else
	{ size_t n_threads = threads->getNThread() ;
	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row, n_threads) ;

	// get promises and futures
	// the function run by the threads will simply fill the promise with
	// "true" to indicate that they are done
	std::vector<std::promise<Matrix3D<double>>> promises(n_threads) ;
	std::vector<std::future<Matrix3D<double>>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }
	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	threads->addJob(std::move(
	std::bind(&ReadLayer::update_model_routine,
	this,
	slice.first,
	slice.second,
	std::ref(posterior_prob),
	std::ref(posterior_prob_colsum),
	std::ref(promises[i])))) ;
	}
	// reinitialise the model
	/*
	this->model = Matrix3D<double>(this->n_class,
	this->l_model,
	this->n_category,
	0.) ;
	*/
	size_t n_class_to_update = this->n_class - this->bckg_class ;
	for(size_t i=0; i<n_class_to_update; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) = 0. ; }
	}
	}
	// wait until all threads are done working
	// and update the mode
	for(auto& future : futures)
	{ Matrix3D<double> model_part = future.get() ;
	// for(size_t i=0; i<this->n_class; i++)
	for(size_t i=0; i<n_class_to_update; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) +=
	model_part(i,j,k) ;
	}
	}
	}
	}
	// -------------------------- threads stop ---------------------------
	}
	// avoid 0's in the model to ensure that pmf_poisson() never
	// return 0
	for(size_t i=0; i<this->n_class; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) =
	std::max(this->model(i,j,k), ReadLayer::p_min) ;
	}
	}
	}
	}

	void ReadLayer::update_model_routine(size_t from,
	size_t to,
	const Matrix4D<double>& posterior_prob,
	const vector_d& posterior_prob_colsum,
	std::promise<Matrix3D<double>>& promise) const
	{
	// dimension checks
	this->check_posterior_prob_dim(posterior_prob) ;
	this->check_posterior_prob_colsum_dim(posterior_prob_colsum) ;

	// partial model
	Matrix3D<double> model(this->n_class,
	this->l_model,
	this->n_category,
	0.) ;

	size_t n_class_to_update = this->n_class - this->bckg_class ;

	for(size_t n_class=0; n_class < n_class_to_update; n_class++)
	{ for(size_t i=from; i<to; i++)
	{ for(size_t n_shift=0; n_shift<this->n_shift; n_shift++)
	{ // --------------- forward ---------------
	int from_dat_fw = n_shift ;
	int to_dat_fw = from_dat_fw + this->l_model - 1 ;
	for(int j_dat_fw=from_dat_fw, j_ref_fw=0;
	j_dat_fw<=to_dat_fw; j_dat_fw++, j_ref_fw++)
	{ model(n_class,j_ref_fw,0) +=
	(posterior_prob(i,n_class,n_shift,flip_states::FORWARD) *
	this->data(i,j_dat_fw)) /
	posterior_prob_colsum[n_class] ;
	}
	// --------------- reverse ---------------
	if(this->flip)
	{ int from_dat_rev = this->n_col - 1 - n_shift ;
	int to_dat_rev = from_dat_rev - (this->l_model - 1) ;
	for(int j_dat_rev=from_dat_rev, j_ref_fw=0;
	j_dat_rev >= to_dat_rev; j_dat_rev--, j_ref_fw++)
	{ model(n_class,j_ref_fw,0) +=
	(posterior_prob(i,n_class,n_shift,flip_states::REVERSE) *
	this->data(i,j_dat_rev)) /
	posterior_prob_colsum[n_class] ;
	}
	}
	}
	}
	}
	promise.set_value(model) ;
	}

	void ReadLayer::compute_window_means(ThreadPool* threads)
	{ // don't parallelize
	if(threads == nullptr)
	{ std::promise<bool> promise ;
	std::future<bool> future = promise.get_future() ;
	this->compute_window_means_routine(0, this->n_row, promise) ;
	future.get() ;
	}
	// parallelize
	else
	{ size_t n_threads = threads->getNThread() ;
	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row, n_threads) ;

	// get promises and futures
	// the function run by the threads will simply fill the promise with
	// "true" to indicate that they are done
	std::vector<std::promise<bool>> promises(n_threads) ;
	std::vector<std::future<bool>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }
	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	threads->addJob(std::move(
	std::bind(&ReadLayer::compute_window_means_routine,
	this,
	slice.first,
	slice.second,
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	for(auto& future : futures)
	{ future.get() ; }
	// -------------------------- threads stop ---------------------------
	}
	}

	void ReadLayer::compute_window_means_routine(size_t from,
	size_t to,
	std::promise<bool>& done)
	{ double l_window = double(this->l_model) ;
	for(size_t i=from; i<to; i++)
	{ for(size_t from=0; from<this->n_shift; from++)
	{ double sum = 0. ;
	// slice is [from,to)
	size_t to = from + this->l_model ;
	for(size_t j=from; j<to; j++)
	{ sum += this->data(i,j) ;}
	this->window_means(i,from) = sum / l_window ;
	}
	}
	done.set_value(true) ;
	}

	void ReadLayer::check_posterior_prob_colsum_dim(const vector_d& posterior_prob_colsum) const
	{ if(posterior_prob_colsum.size() != this->n_class)
	{ char msg[4096] ;
	sprintf(msg,
	"Error! posterior_class_prob matrix size is not "
	"equal to model class number : %zu / %zu",
	posterior_prob_colsum.size(), this->n_class) ;
	throw std::invalid_argument(msg) ;
	}
	}

	void ReadLayer::create_bckg_class()
	{
	// mean count
	double mean = 0. ;
	double n = (double)this->data.get_nrow() *
	(double)this->data.get_ncol() ;
	for(size_t i=0; i<this->data.get_nrow(); i++)
	{ for(size_t j=0; j<this->data.get_ncol(); j++)
	{ mean += ((double)this->data(i,j)) / n ; }
	}
	// set last class as background class
	for(size_t j=0; j<this->l_model; j++)
	{ this->model(this->n_class-1,j,0) = mean ; }
	}

	diff --git a/src/Clustering/ReadLayer.hpp b/src/Clustering/ReadLayer.hpp
	index 5d52c92..cd001e4 100644
	--- a/src/Clustering/ReadLayer.hpp
	+++ b/src/Clustering/ReadLayer.hpp
	@@ -1,269 +1,278 @@
	#ifndef READLAYER_HPP
	#define READLAYER_HPP

	#include <Data2DLayer.hpp>

	#include <future> // std::promise

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <ThreadPool.hpp>

	typedef std::vector<double> vector_d ;

	class ReadLayer : public Data2DLayer
	{
	public:
	/*!
	* \brief Constructs an object with the
	* given data and an empty (0 values)
	* model.
	* \param data the data.
	* \param n_class the number of classes
	* of the model.
	* \param n_shift the number of shift
	* states of the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	* \param threads a pointer to a thread pool to
	* parallelize the computations. If nullptr is given,
	* the computations are performed by the main thread.
	*/
	ReadLayer(const Matrix2D<int>& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	ThreadPool* threads = nullptr) ;

	/*!
	* \brief Constructs an object with the
	* given data and an empty (0 values)
	* model.
	* \param data the data.
	* \param n_class the number of classes
	* of the model.
	* \param n_shift the number of shift
	* states of the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	* \param threads a pointer to a thread pool to
	* parallelize the computations. If nullptr is given,
	* the computations are performed by the main thread.
	*/
	ReadLayer(Matrix2D<int>&& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	ThreadPool* threads = nullptr) ;

	/*!
	* \brief Construct an object with the
	* given data and model.
	* \param data the data.
	* \param the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	* \param threads a pointer to a thread pool to
	* parallelize the computations. If nullptr is given,
	* the computations are performed by the main thread.
	*/
	ReadLayer(const Matrix2D<int>& data,
	const Matrix3D<double>& model,
	bool flip,
	bool bckg_class,
	ThreadPool* threads = nullptr) ;

	/*!
	* \brief Construct an object with the
	* given data and model.
	* \param data the data.
	* \param the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	* \param threads a pointer to a thread pool to
	* parallelize the computations. If nullptr is given,
	* the computations are performed by the main thread.
	*/
	ReadLayer(Matrix2D<int>&& data,
	Matrix3D<double>&& model,
	bool flip,
	bool bckg_class,
	ThreadPool* threads = nullptr) ;

	/*!
	* Destructor
	*/
	virtual ~ReadLayer() override ;

	/*!
	* \brief Computes the log likelihood of the data
	* given the current model parameters.
	* During this process, a normalized version of the
	* models, having a sum of signal of 1 count in average,
	* is used (a copy of the models is normalized, meaning
	* that the original models can still be retrieved the
	* dedicated getter).
	* \param logliklihood a matrix to store the
	* results. It should have the following dimensions :
	* 1st : same as the data number of row
	* 2nd : same as the model number of classes
	* 3rd : same as the number of shifts
	* 4th : same as the number of flip states
	* \param loglikelihood_max a vector containing the
	* max value for each row of loglikelihood.
	* Its length should be equal to the data row number.
	* \param threads a pointer to a thread pool to
	* parallelize the computations. If nullptr is given,
	* the computations are performed by the main thread.
	* \throw std::invalid_argument if the dimensions are
	* incorrect.
	*/
	virtual void compute_loglikelihoods(Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	ThreadPool* threads=nullptr) const override ;

	/*!
	* \brief Updates the model given the posterior
	* probabilities (the probabilities of each row
	* in the data to be assigned to each class,
	* for each shift and flip state).
	- * \param posterior_prob the data assignment probabilities to
	- * the different classes.
	+ * 0 values inside the model are forbidden.
	+ * The lowest possible value inside the model
	+ * is ReadLayer::p_min.
	+ * \param posterior_prob the data assignment
	+ * probabilities to the different classes.
	* \param threads a pointer to a thread pool to
	* parallelize the computations. If nullptr is given,
	* the computations are performed by the main thread.
	*/
	virtual void update_model(const Matrix4D<double>& posterior_prob,
	ThreadPool* threads=nullptr) override ;

	/*!
	* \brief Updates the model given the posterior
	* probabilities (the probabilities of each row
	* in the data to be assigned to each class,
	* for each shift and flip state).
	* This method does the same as the virtual method it
	* overloads. The only difference is that, for run time
	* gain, it is given the sum over the columns of the
	* posterior_prob matrix which is computed by the virtual
	* method.
	+ * 0 values inside the model are forbidden.
	+ * The lowest possible value inside the model
	+ * is ReadLayer::p_min.
	* \param posterior_prob the data assignment probabilities to
	* the different classes.
	* \param posterior_prob_colsum the sum over the columns
	* (classes) of the posterior_prob matrix.
	* \param threads a pointer to a thread pool to
	* parallelize the computations. If nullptr is given,
	* the computations are performed by the main thread.
	*/
	void update_model(const Matrix4D<double>& posterior_prob,
	const vector_d& posterior_prob_colsum,
	ThreadPool* threads=nullptr) ;
	protected:
	/*!
	* \brief The routine that effectively performs the
	* loglikelihood computations.
	* \param from the index of the first row of the data
	* to considered.
	* \param to the index of the past last row of the data
	* to considered.
	* \param loglikelihood a matrix to store the
	* results. It should have the following dimensions :
	* 1st : same as the data number of row
	* 2nd : same as the model number of classes
	* 3rd : same as the number of shifts
	* 4th : same as the number of flip states
	* \param loglikelihood_max a vector containing the
	* max value for each row of log_likelihood.
	* Its length should be equal to the data row number.
	* \param done a promise to be filled when the routine
	* is done running.
	*/
	void compute_loglikelihoods_routine(size_t from,
	size_t to,
	Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	std::promise<bool>& done) const ;

	/*!
	* \brief The routine that effectively update the model.
	+ * 0 values inside the model are forbidden.
	+ * The lowest possible value inside the model
	+ * is ReadLayer::p_min.
	* \param from the index of the first row of the
	* posterior probabilities to considered.
	* \param to the index of the past last row of the
	* posterior probabilities to considered.
	* \param posterior_prob the data assignment probabilities
	* to the different classes.
	* \param
	* \param promise a promise containing the partial model
	* computed from the given data slice. If several routines
	* work together to update the model, the promise matrices
	* need to be summed up to get the final model.
	*/
	void update_model_routine(size_t from,
	size_t to,
	const Matrix4D<double>& posterior_prob,
	const vector_d& posterior_prob_colsum,
	std::promise<Matrix3D<double>>& promise) const ;

	/*!
	* \brief Computes the mean number of reads present in
	* each slice (of length l_model), in each row
	* of the data and store them in this->window_means.
	* \param threads a pointer to a thread pool to
	* parallelize the computations. If nullptr is given,
	* the computations are performed by the main thread.
	*/
	void compute_window_means(ThreadPool* threads) ;

	/*!
	* \brief The routine that effectively computes the
	* window means.
	* \param from the index of the first row of the
	* data to considered.
	* \param to the index of the past last row of the
	* data to considered.
	* \param done a promise to fill when the routine
	* is done running.
	*/
	void compute_window_means_routine(size_t from,
	size_t to,
	std::promise<bool>& done) ;

	/*!
	* \brief Checks that the argument has compatible
	* dimensions with the data and models. If this is
	* not the case, throw a std::invalid_argument with
	* a relevant message.
	* \param posterior_class_prob a vector containing the
	* class probabilities.
	* It should have a length equal to the number of
	* classes.
	* \throw std::invalid_argument if the dimensions are
	* incorrect.
	*/
	void check_posterior_prob_colsum_dim(const vector_d& posterior_prob_colsum) const ;

	/*!
	* \brief Sets the last class as background class with the mean
	* number of counts in the data at each position.
	*/
	void create_bckg_class() ;

	/*!
	* \brief contains the data means, for
	* each window of size l_model.
	*/
	Matrix2D<double> window_means ;

	} ;

	#endif // READLAYER_HPP
	diff --git a/src/Clustering/SequenceLayer.cpp b/src/Clustering/SequenceLayer.cpp
	index 8906c96..d14859f 100644
	--- a/src/Clustering/SequenceLayer.cpp
	+++ b/src/Clustering/SequenceLayer.cpp
	@@ -1,432 +1,432 @@
	#include <SequenceLayer.hpp>

	#include <stdexcept> // std::invalid_argument
	#include <limits> // numeric_limits
	#include <cmath> // log(), pow()
	#include <vector>
	#include <algorithm> // std::max_element()
	#include <future> // std::promise, std::future

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <dna_utility.hpp> // dna::base_composition()


	double SequenceLayer::score_subseq(const Matrix2D<int>& seq,
	size_t row,
	size_t start,
	const Matrix2D<double>& model_log)
	{
	if(start > seq.get_ncol() - model_log.get_nrow())
	{ char msg[4096] ;
	sprintf(msg, "Error! given start (%zu) is too high. Max value is %zu",
	start, seq.get_ncol() - model_log.get_nrow()) ;
	throw std::invalid_argument(msg) ;
	}
	else if(model_log.get_nrow() > seq.get_ncol())
	{ char msg[4096] ;
	sprintf(msg, "Error! given model is longer than sequences (%zu / %zu)",
	model_log.get_nrow(), seq.get_ncol()) ;
	throw std::invalid_argument(msg) ;
	}
	else if(model_log.get_ncol() != 4)
	{ char msg[4096] ;
	sprintf(msg, "Error! given model 2nd dimension is not 4 (%zu)",
	model_log.get_ncol()) ;
	throw std::invalid_argument(msg) ;
	}

	size_t from = start ;
	size_t to = from + model_log.get_nrow() ; // will score [from, to)

	int n_code = dna::char_to_int('N') ;
	double ll = 0 ;
	for(size_t i=from, j=0; i<to; i++, j++)
	{ int base = seq(row,i) ;
	// N char -> get max score
	if(base == n_code)
	{ std::vector<double> row = model_log.get_row(j) ;
	ll += *(std::max_element(std::begin(row),
	std::end(row))) ;
	}
	// A,C,G,T -> get its score
	else
	{ ll += model_log(j,base) ; }
	}
	return ll ;
	}

	SequenceLayer::SequenceLayer(const Matrix2D<int>& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class)
	: Data2DLayer(data, n_class, n_shift, flip,bckg_class)
	{
	this->n_category = 4 ;

	// initialise the empty model
	this->model = Matrix3D<double>(this->n_class,
	this->l_model,
	this->n_category,
	0.) ;
	// background class
	if(this->bckg_class)
	{ this->create_bckg_class() ; }
	}

	SequenceLayer::SequenceLayer(Matrix2D<int>&& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class)
	: Data2DLayer(std::move(data), n_class, n_shift, flip, bckg_class)
	{
	this->n_category = 4 ;

	// initialise the empty model
	this->model = Matrix3D<double>(this->n_class,
	this->l_model,
	this->n_category,
	0.) ;

	// background class
	if(this->bckg_class)
	{ this->create_bckg_class() ; }
	}

	SequenceLayer::SequenceLayer(const Matrix2D<int>& data,
	const Matrix3D<double>& model,
	bool flip,
	bool bckg_class)
	: Data2DLayer(data, model,flip, bckg_class)
	{}

	SequenceLayer::SequenceLayer(Matrix2D<int>&& data,
	Matrix3D<double>&& model,
	bool flip,
	bool bckg_class)
	: Data2DLayer(std::move(data), std::move(model),flip, bckg_class)
	{}

	SequenceLayer::~SequenceLayer()
	{ ; }

	void SequenceLayer::compute_loglikelihoods(Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	ThreadPool* threads) const
	{
	// dimension checks
	this->check_loglikelihood_dim(loglikelihood) ;
	this->check_loglikelihood_max_dim(loglikelihood_max) ;

	// compute the log prob model and the log prob reverse-complement model
	std::vector<Matrix2D<double>> model_log(this->n_class,
	Matrix2D<double>(this->l_model,
	this->n_category,
	0.)) ;
	std::vector<Matrix2D<double>> model_log_rev = model_log ;
	for(size_t i=0; i<this->n_class; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ // forward
	model_log[i](j,k) = log(this->model(i,j,k)) ;
	// reverse
	model_log_rev[i](this->l_model-j-1,this->n_category-k-1)
	= log(this->model(i,j,k)) ;
	}
	}
	}

	// don't parallelize
	if(threads == nullptr)
	{ std::promise<bool> promise ;
	std::future<bool> future = promise.get_future() ;
	this->compute_loglikelihoods_routine(0, this->n_row,
	loglikelihood,
	loglikelihood_max,
	model_log,
	model_log_rev,
	promise) ;
	future.get() ;
	}
	// parallelize
	else
	{ size_t n_threads = threads->getNThread() ;
	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row, n_threads) ;

	// get promises and futures
	// the function run by the threads will simply fill the promise with
	// "true" to indicate that they are done
	std::vector<std::promise<bool>> promises(n_threads) ;
	std::vector<std::future<bool>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }
	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	threads->addJob(std::move(
	std::bind(&SequenceLayer::compute_loglikelihoods_routine,
	this,
	slice.first,
	slice.second,
	std::ref(loglikelihood),
	std::ref(loglikelihood_max),
	std::ref(model_log),
	std::ref(model_log_rev),
	std::ref(promises[i])))) ;
	}
	// wait until all threads are done working
	for(auto& future : futures)
	{ future.get() ; }
	// -------------------------- threads stop ---------------------------
	}
	}

	void SequenceLayer::compute_loglikelihoods_routine(size_t from,
	size_t to,
	Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	const std::vector<Matrix2D<double>>& model_log,
	const std::vector<Matrix2D<double>>& model_log_rev,
	std::promise<bool>& done) const
	{
	// compute log likelihood
	for(size_t i=from; i<to; i++)
	{
	// set max to min possible value
	loglikelihood_max[i] = std::numeric_limits<double>::lowest() ;

	for(size_t j=0; j<this->n_class; j++)
	- { for(size_t s=0; s<this->n_shift; s++)
	+ {
	+ for(size_t s=0; s<this->n_shift; s++)
	{ // forward strand
	- { double ll_fw = std::max(score_subseq(this->data, i, s, model_log[j]),
	- SequenceLayer::p_min_log) ;
	+ { double ll_fw = score_subseq(this->data, i, s, model_log[j]) ;
	// loglikelihood(i,j,s,flip_states::FORWARD) = ll_fw ;
	// apply lower bound here -> log(1e-100)
	loglikelihood(i,j,s,flip_states::FORWARD) = ll_fw ;
	// keep track of max per row
	if(ll_fw > loglikelihood_max[i])
	{ loglikelihood_max[i] = ll_fw ; }

	}
	// reverse
	if(this->flip)
	- { double ll_rev = std::max(score_subseq(this->data, i, s, model_log_rev[j]),
	- SequenceLayer::p_min_log) ;
	+ { double ll_rev = score_subseq(this->data, i, s, model_log_rev[j]) ;
	// loglikelihood(i,j,s,flip_states::REVERSE) = ll_rev ;
	// apply lower bound here -> log(1e-100)
	loglikelihood(i,j,s,flip_states::REVERSE) = ll_rev ;
	// keep track of max per row
	if(ll_rev > loglikelihood_max[i])
	{ loglikelihood_max[i] = ll_rev ; }
	}
	}
	}
	}
	done.set_value(true) ;
	}

	+
	void SequenceLayer::update_model(const Matrix4D<double>& posterior_prob,
	ThreadPool* threads)
	{ // don't parallelize
	if(threads == nullptr)
	{ std::promise<Matrix3D<double>> promise ;
	std::future<Matrix3D<double>> future = promise.get_future() ;
	this->update_model_routine(0,
	this->n_row,
	posterior_prob,
	promise) ;
	// this->model = future.get() ;
	auto model = future.get() ;
	size_t n_class_to_update = this->n_class - this->bckg_class ;
	for(size_t i=0; i<n_class_to_update; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) = model(i,j,k) ; }
	}
	}
	}
	// parallelize
	else
	{ size_t n_threads = threads->getNThread() ;
	// compute the slices on which each thread will work
	std::vector<std::pair<size_t,size_t>> slices =
	ThreadPool::split_range(0, this->n_row, n_threads) ;

	// get promises and futures
	// the function run by the threads will simply fill the promise with
	// "true" to indicate that they are done
	std::vector<std::promise<Matrix3D<double>>> promises(n_threads) ;
	std::vector<std::future<Matrix3D<double>>> futures(n_threads) ;
	for(size_t i=0; i<n_threads; i++)
	{ futures[i] = promises[i].get_future() ; }
	// distribute work to threads
	// -------------------------- threads start --------------------------
	for(size_t i=0; i<n_threads; i++)
	{ auto slice = slices[i] ;
	threads->addJob(std::move(
	std::bind(&SequenceLayer::update_model_routine,
	this,
	slice.first,
	slice.second,
	std::ref(posterior_prob),
	std::ref(promises[i])))) ;
	}
	// reinitialise the model
	size_t n_class_to_update = this->n_class - this->bckg_class ;
	for(size_t i=0; i<n_class_to_update; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) = 0. ; }
	}
	}
	// wait until all threads are done working
	// and update the model
	for(auto& future : futures)
	{ Matrix3D<double> model_part = future.get() ;
	// for(size_t i=0; i<this->n_class; i++)
	for(size_t i=0; i<n_class_to_update; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) += model_part(i,j,k) ; }
	}
	}
	}
	// -------------------------- threads stop ---------------------------
	}
	// make sure to have no 0 values
	for(size_t i=0; i<this->n_class; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ for(size_t k=0; k<this->n_category; k++)
	{ this->model(i,j,k) =
	std::max(this->model(i,j,k), SequenceLayer::p_min) ;
	}
	}
	}
	// normalize to get probs
	for(size_t i=0; i<this->n_class; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ double sum = 0. ;
	for(size_t k=0; k<this->n_category; k++)
	{ sum += this->model(i,j,k) ; }
	for(size_t k=0; k<this->n_category; k++)
	{ double p = this->model(i,j,k) / sum ;
	this->model(i,j,k) = p ;
	}
	}
	}
	}


	void SequenceLayer::update_model_routine(size_t from,
	size_t to,
	const Matrix4D<double>& posterior_prob,
	std::promise<Matrix3D<double>>& promise) const
	{ // dimension checks
	this->check_posterior_prob_dim(posterior_prob) ;

	Matrix3D<double> model(this->n_class,
	this->l_model,
	this->n_category,
	0.) ;

	// the int code of A, C, G, T, N
	static int a_code = dna::char_to_int('A') ;
	static int c_code = dna::char_to_int('C') ;
	static int g_code = dna::char_to_int('G') ;
	static int t_code = dna::char_to_int('T') ;
	static int n_code = dna::char_to_int('N') ;
	// the int code of the reverse complement of A, C, G, T
	static int a_code_r = dna::char_to_int('A', true) ;
	static int c_code_r = dna::char_to_int('C', true) ;
	static int g_code_r = dna::char_to_int('G', true) ;
	static int t_code_r = dna::char_to_int('T', true) ;

	size_t n_class_to_update = this->n_class - this->bckg_class ;

	for(size_t k=0; k<n_class_to_update; k++)
	{ for(size_t s=0; s<this->n_shift; s++)
	{ // for(size_t j=0; j<this->l_model; j++)
	for(size_t j=0, j_rv=this->l_model-1; j<this->l_model; j++, j_rv--)
	{ // base prob on fw and rv strand
	vector_d base_prob_fw(this->n_category, 0.) ;
	vector_d base_prob_rv(this->n_category, 0.) ;
	for(size_t i=from; i<to; i++)
	{ int base = this->data(i,s+j) ;
	int base_rv = this->n_category - base - 1 ; // complement
	// int base_rv = this->n_category - this->data(i,s+j_rv) - 1 ;
	// std::cerr << k << " "
	// << s << " "
	// << j << "/" << j_rv << " "
	// << base << "/" << base_rv
	// << std::endl ;
	// N
	if(base == n_code)
	{ // --------------- forward ---------------
	{ base_prob_fw[a_code] +=
	posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
	base_prob_fw[c_code] +=
	posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
	base_prob_fw[g_code] +=
	posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
	base_prob_fw[t_code] +=
	posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
	}
	// --------------- reverse ---------------
	if(this->flip)
	{ base_prob_rv[a_code_r] +=
	posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
	base_prob_rv[c_code_r] +=
	posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
	base_prob_rv[g_code_r] +=
	posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
	base_prob_rv[t_code_r] +=
	posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
	}
	}
	// A, C, G, T
	else
	{ // --------------- forward ---------------
	{ base_prob_fw[base] +=
	posterior_prob(i,k,s,SequenceLayer::FORWARD) ;
	}
	// --------------- reverse ---------------
	if(this->flip)
	{ base_prob_rv[base_rv] +=
	posterior_prob(i,k,s,SequenceLayer::REVERSE) ;
	}
	}
	}
	// update this position of the model
	for(size_t i=0; i<base_prob_fw.size(); i++)
	{ // --------------- forward ---------------
	{ model(k,j,i) += base_prob_fw[i] ; }
	// --------------- reverse ---------------
	if(this->flip)
	{ model(k,this->l_model-j-1,i) += base_prob_rv[i] ;
	/* model(k,j_rv,i) += base_prob_rv[i] ; */

	}
	}
	}
	}
	}
	promise.set_value(model) ;
	}

	void SequenceLayer::create_bckg_class()
	{ // sequence composition
	std::vector<double> base_comp =
	dna::base_composition(this->data,
	this->flip) ;
	// set last class as background class
	for(size_t i=0; i<this->n_category; i++)
	{ for(size_t j=0; j<this->l_model; j++)
	{ this->model(this->n_class-1,j,i) = base_comp[i] ; }
	}
	}
	diff --git a/src/Clustering/SequenceLayer.hpp b/src/Clustering/SequenceLayer.hpp
	index 2238ff4..4cd5bf8 100644
	--- a/src/Clustering/SequenceLayer.hpp
	+++ b/src/Clustering/SequenceLayer.hpp
	@@ -1,231 +1,237 @@
	#ifndef SEQUENCELAYER_HPP
	#define SEQUENCELAYER_HPP

	#include <Data2DLayer.hpp>

	#include <iostream>
	#include <string>
	#include <future> // std::promise
	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <ThreadPool.hpp>

	typedef std::vector<double> vector_d ;

	class SequenceLayer : public Data2DLayer
	{
	public:
	/*!
	* \brief Computes the log-likelihood of the sub-
	* sequence - stored in a given row - and starting
	* at the offset <col> in the given sequence matrix.
	* The subsequence length is determined by the model
	* lenght.
	* \param seq the sequences in integer format.
	* \param row the row containing the sequence of
	* interest.
	* \param col the index at which the sub-sequence
	* is starting (1st index inside the subsequence
	* of interest).
	* \param model_log a model containing the log
	* probability model.
	* \return the log-likelihood of the sub-sequence
	* given the model.
	* \throw std::invalid_argument if 1) the offset is
	* invalid, 2) the sequence and the model have
	* incompatible dimensions or 3) the model 2n dimension
	* is not 4 (A,C,G,T).
	*/
	static double score_subseq(const Matrix2D<int>& seq,
	size_t row,
	size_t col,
	const Matrix2D<double>& model_log) ;

	public:
	/*!
	* \brief Constructs an object with the
	* given data and an empty (0 values)
	* model.
	* \param data the data.
	* \param n_class the number of classes
	* of the model.
	* \param n_shift the number of shift
	* states of the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	SequenceLayer(const Matrix2D<int>& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class) ;

	/*!
	* \brief Constructs an object with the
	* given data and an empty (0 values)
	* model.
	* \param data the data.
	* \param n_class the number of classes
	* of the model.
	* \param n_shift the number of shift
	* states of the model.
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	SequenceLayer(Matrix2D<int>&& data,
	size_t n_class,
	size_t n_shift,
	bool flip,
	bool bckg_class) ;

	/*!
	* \brief Construct an object with the
	* given data and model.
	* The shifting freedom is set to (data number
	* of columns) - (the model 2nd dimension)
	* + 1.
	* \param data the data. The sequences
	* should be stored as integer values :
	* A:0, C:1, G:2, T:3, else:5.
	* \param model the model with the following
	* dimensions :
	* dim1 the number of classes
	* dim2 the model length
	* dim3 4 (A,C,G,T)
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	SequenceLayer(const Matrix2D<int>& data,
	const Matrix3D<double>& model,
	bool flip,
	bool bckg_class) ;


	/*!
	* \brief Construct an object with the
	* given data and model.
	* The shifting freedom is set to (data number
	* of columns) - (the model 2nd dimension)
	* + 1.
	* \param data the data. The sequences
	* should be stored as integer values :
	* A:0, C:1, G:2, T:3, else:5.
	* \param model the model with the following
	* dimensions :
	* dim1 the number of classes
	* dim2 the model length
	* dim3 4 (A,C,G,T)
	* \param flip whether flipping is allowed.
	* \param bckg_class should the last class
	* of the model is constant and models the
	* background.
	*/
	SequenceLayer(Matrix2D<int>&& data,
	Matrix3D<double>&& model,
	bool flip,
	bool bckg_class) ;

	/*!
	* Destructor
	*/
	virtual ~SequenceLayer() override ;

	/*!
	* \brief Computes the log likelihood of the data
	* given the current model parameters.
	* \param logliklihood a matrix to store the
	* results. It should have the following dimensions :
	* 1st : same as the data number of row
	* 2nd : same as the model number of classes
	* 3rd : same as the number of shifts
	* 4th : same as the number of flip states
	* \param loglikelihood_max a vector containing the
	* max value for each row of loglikelihood.
	* Its length should be equal to the data row number.
	* \throw std::invalid_argument if the dimensions are
	* incorrect.
	*/
	virtual void compute_loglikelihoods(Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	ThreadPool* threads=nullptr) const override ;

	/*!
	* \brief Updates the model given the posterior
	* probabilities (the probabilities of each row
	* in the data to be assigned to each class,
	* for each shift and flip state).
	+ * 0 values inside the model are forbidden.
	+ * The lowest possible value inside the model
	+ * is SequenceLayer::p_min.
	* \param posterior_prob the data assignment probabilities to
	* the different classes.
	*/
	virtual void update_model(const Matrix4D<double>& posterior_prob,
	ThreadPool* threads=nullptr) override ;

	protected:
	/*!
	* \brief The routine that effectively performs the
	* loglikelihood computations.
	* \param from the index of the first row of the data
	* to considered.
	* \param to the index of the past last row of the data
	* to considered.
	* \param loglikelihood a matrix to store the
	* results. It should have the following dimensions :
	* 1st : same as the data number of row
	* 2nd : same as the model number of classes
	* 3rd : same as the number of shifts
	* 4th : same as the number of flip states
	* \param loglikelihood_max a vector containing the
	* max value for each row of log_likelihood.
	* Its length should be equal to the data row number.
	* \param model_log a vector containing the matrices with
	* the log values of the model for each class.
	* \param model_log_rev a vector containing the matrices with
	* the log values of the reverse strand model for each class
	* (the 1st position in the model becomes the last in the
	* reverse model and probabilities are swapped A<->T and C<->G).
	* \param done a promise to be filled when the routine
	* is done running.
	*/
	void compute_loglikelihoods_routine(size_t from,
	size_t to,
	Matrix4D<double>& loglikelihood,
	vector_d& loglikelihood_max,
	const std::vector<Matrix2D<double>>& model_log,
	const std::vector<Matrix2D<double>>& model_log_rev,
	std::promise<bool>& done) const ;

	/*!
	* \brief The routine that effectively update the model.
	+ * 0 values inside the model are forbidden.
	+ * The lowest possible value inside the model
	+ * is SequenceLayer::p_min.
	* \param from the index of the first row of the
	* posterior probabilities to considered.
	* \param to the index of the past last row of the
	* posterior probabilities to considered.
	* \param posterior_prob the data assignment probabilities
	* to the different classes.
	* \param
	* \param done a promise containing the partial model
	* computed from the given data slice. If several routines
	* work together at updating the model, they need to be
	* summed and normalized (by the column sum) to get the
	* final model.
	*/
	void update_model_routine(size_t from,
	size_t to,
	const Matrix4D<double>& posterior_prob,
	std::promise<Matrix3D<double>>& done) const ;

	/*!
	* \brief Sets the last class as background class with the
	* mean base probababilities of the data at each position.
	*/
	void create_bckg_class() ;

	} ;
	#endif // SEQUENCELAYER_HPP
	diff --git a/src/main_test.cpp b/src/main_test.cpp
	index 2256a7a..7e2a898 100644
	--- a/src/main_test.cpp
	+++ b/src/main_test.cpp
	@@ -1,457 +1,453 @@
	#include <iostream>
	#include <string>
	#include <vector>
	#include <utility>
	#include <cmath>

	#include <Matrix2D.hpp>
	#include <Matrix3D.hpp>
	#include <Matrix4D.hpp>
	#include <ConsensusSequenceModelComputer.hpp>
	#include <EMConsensusSequence.hpp>
	#include <SequenceModelComputer.hpp>
	#include <EMSequence.hpp>
	#include <dna_utility.hpp>


	template<class T>
	std::ostream& operator << (std::ostream& stream, const std::vector<T>& v)
	{ for(const auto& x : v)
	{ stream << x << " " ; }
	return stream ;
	}


	Matrix3D<double> compute_consensus_sequences(const Matrix2D<int>& seq,
	const Matrix4D<double>& prob,
	size_t class_k)
	{
	size_t n_seq = seq.get_nrow() ;
	size_t l_seq = seq.get_ncol() ;
	// size_t n_class = prob.get_dim()[1] ;
	size_t n_shift = prob.get_dim()[2] ;
	size_t n_flip = prob.get_dim()[3] ;
	bool flip = n_flip == 2 ;
	size_t l_model = l_seq - n_shift + 1 ;

	Matrix3D<double> cons_seq(n_seq, l_model, 4, 0.) ;

	// aggregate
	for(size_t i=0; i<n_seq; i++)
	{ std::cerr << "i " << i << std::endl ;
	for(size_t s=0; s<n_shift; s++)
	{ //std::cerr << "s " << s << std::endl ;
	double tot = 0. ;
	// aggregate
	for(size_t j=0; j<l_model; j++)
	{ // std::cerr << "j " << j << std::endl ;
	// std::cerr << "seq(" << i << "," << s+j << ")" << std::endl ;
	int base = seq(i,s+j) ;
	// N should participate to A,C,G,T equivalently -> same as doing nothing
	if(base == dna::char_to_int('N'))
	{ continue ; }
	int base_rv = 4 - base - 1 ;
	// --------- forward ---------
	// std::cerr << "cons_seq(" << i << "," << j << "," << base << ") += prob(" << i << "," << class_k << "," << 0 << std::endl ;
	cons_seq(i,j,base) += prob(i,class_k,s,0) ;
	tot += prob(i,class_k,s,0) ;

	// --------- reverse ---------
	if(flip)
	{ // std::cerr << "cons_seq(" << i << "," << j << "," << base_rv << ") += prob(" << i << "," << class_k << "," << 1 << std::endl ;
	cons_seq(i,j,base_rv) += prob(i,class_k,s,1) ;
	tot += prob(i,class_k,s,1) ;
	}
	}
	}
	}

	// normalize
	for(size_t i=0; i<n_seq; i++)
	{ for(size_t j=0; j<l_model; j++)
	{ double tot = 0. ;
	for(size_t k=0; k<4; k++)
	{ tot += cons_seq(i,j,k) ; }
	for(size_t k=0; k<4; k++)
	{ cons_seq(i,j,k) /= tot ; }
	}
	}

	return cons_seq ;
	}

	Matrix2D<int> char_to_int(const Matrix2D<char>& seq)
	{
	size_t n_row = seq.get_nrow() ;
	size_t n_col = seq.get_ncol() ;

	Matrix2D<int> iseq(n_row, n_col, 0.) ;

	for(size_t i=0; i<n_row; i++)
	{ for(size_t j=0; j<n_col; j++)
	{ iseq(i,j) = dna::char_to_int(seq(i,j)) ; }
	}
	return iseq ;
	}

	Matrix3D<double> sequence_to_consensus(const Matrix2D<int>& seq)
	{
	size_t n_row = seq.get_nrow() ;
	size_t n_col = seq.get_ncol() ;
	size_t n_dim3 = 4 ; // A, C, G, T

	int base_n = dna::char_to_int('N') ;

	Matrix3D<double> cseq(n_row, n_col, n_dim3, 1e-100) ;


	double prob = 1. - 3*(1e-100) ; // p(base) - p(!base)

	for(size_t i=0; i<n_row; i++)
	{ for(size_t j=0; j<n_col; j++)
	{ int base = seq(i,j) ;
	// N
	if(base == base_n)
	{ cseq(i,j,0) = 0.25 ;
	cseq(i,j,1) = 0.25 ;
	cseq(i,j,2) = 0.25 ;
	cseq(i,j,3) = 0.25 ;
	}
	// A C G T
	else
	{ cseq(i,j, base) = prob ; }
	}
	}
	return cseq ;
	}

	double score_consensus_subseq(const Matrix3D<double>& cons_seq,
	size_t row,
	size_t start,
	const Matrix2D<double>& model)
	{ size_t ncol = cons_seq.get_dim()[1] ;
	// size_t dim1 = cons_seq.get_dim()[0] ;
	// size_t dim2 = cons_seq.get_dim()[1] ;
	size_t dim3 = cons_seq.get_dim()[2] ;

	if(start > ncol - model.get_nrow())
	{ char msg[4096] ;
	sprintf(msg, "Error! given start (%zu) is too high. Max value is %zu",
	start, ncol - model.get_nrow()) ;
	throw std::invalid_argument(msg) ;
	}
	else if(model.get_nrow() > ncol)
	{ char msg[4096] ;
	sprintf(msg, "Error! given model is longer than sequences (%zu / %zu)",
	model.get_nrow(), ncol) ;
	throw std::invalid_argument(msg) ;
	}
	else if(model.get_ncol() != 4)
	{ char msg[4096] ;
	sprintf(msg, "Error! given model 2nd dimension is not 4 (%zu)",
	model.get_ncol()) ;
	throw std::invalid_argument(msg) ;
	}
	else if(dim3 != 4)
	{ char msg[4096] ;
	sprintf(msg, "Error! given data 3rd dimension is not 4 (%zu)",
	dim3) ;
	throw std::invalid_argument(msg) ;
	}

	/*

	double ll = 0 ;
	for(size_t j_seq=from, j_mod=0; j_seq<to; j_seq++, j_mod++)
	{ for(size_t k=0; k<dim3; k++)
	{ std::cerr << ll
	<< " *= "
	<< "("
	<< model(j_mod,k)
	<< " * "
	<< cons_seq(row,j_seq,k)
	<< ")"
	<< std::endl ;
	ll = model(j_mod,k) cons_seq(row,j_seq,k) ;
	}
	}*/

	size_t from = start ;
	size_t to = from + model.get_nrow() ; // will score [from, to)

	double ll = 0. ;

	for(size_t j_seq=start, j_mod=0; j_seq<to; j_seq++, j_mod++)
	{ double sum = 0. ;
	for(size_t k=0; k<dim3; k++)
	{ std::cerr << sum
	<< "+= ("
	<< cons_seq(row, j_seq, k)
	<< " * "
	<< model(j_mod, k)
	<< ")"
	<< std::endl ;
	sum += (cons_seq(row, j_seq, k) * model(j_mod, k)) ;
	}
	std::cerr << sum << std::endl ;
	std::cerr << "---------------" << std::endl ;
	ll += log(sum) ;
	}

	return ll ;
	}


	Matrix2D<double> test_consensus(const std::string& file,
	- size_t n_class,
	- size_t n_iter,
	- size_t n_shift,
	- bool flip,
	- bool bckg_class,
	- const std::string& seed,
	- size_t n_thread)
	+ size_t n_class,
	+ size_t n_iter,
	+ size_t n_shift,
	+ bool flip,
	+ bool bckg_class,
	+ const std::string& seed,
	+ size_t n_thread)
	{ size_t n_flip = flip + 1 ;

	Matrix2D<int> seq(file) ;
	// Matrix2D<int> seq(char_to_int(Matrix2D<char>(file))) ;
	Matrix3D<double> cseq = sequence_to_consensus(seq) ;

	// partition
	EMConsensusSequence em(cseq,
	n_class,
	n_iter,
	n_shift,
	flip,
	bckg_class,
	seed,
	n_thread) ;
	em.classify() ;

	// get post prob
	Matrix4D<double> prob = em.get_post_prob() ;

	- std::cerr << prob << std::endl ;
	-
	// compute models
	ConsensusSequenceModelComputer mc(std::move(cseq),
	prob,
	bckg_class,
	n_thread) ;

	Matrix2D<double> model = mc.get_model() ;

	// compute the class prob
	size_t n_row = prob.get_dim()[0] ;
	std::vector<double> class_prob(n_class, 0.) ;
	double p_tot = 0. ;
	for(size_t i=0; i<n_row; i++)
	{ for(size_t j=0; j<n_class; j++)
	{ for(size_t k=0; k<n_shift; k++)
	{ for(size_t l=0; l<n_flip; l++)
	{ class_prob[j] += prob(i,j,k,l) ;
	p_tot += prob(i,j,k,l) ;
	}
	}
	}
	}
	for(auto& prob : class_prob)
	{ prob /= p_tot ; }
	// create a matrix containing the class prob in the 1st
	// column and the model in the remaining columns
	Matrix2D<double> model_final(model.get_nrow(),
	model.get_ncol() + 1) ;
	size_t i_class = 0 ;
	for(size_t i=0; i<model_final.get_nrow(); i++)
	{ if((i != 0) and (i % 4 == 0))
	{ i_class++ ; }
	model_final(i,0) = class_prob[i_class] ;
	}
	// fill the remaining with the model parameters
	for(size_t i=0; i<model.get_nrow(); i++)
	{ for(size_t j=0; j<model.get_ncol(); j++)
	{ model_final(i,j+1) = model(i,j) ; }
	}
	return model_final ;
	}

	Matrix2D<double> test_sequence(const std::string& file,
	size_t n_class,
	size_t n_iter,
	size_t n_shift,
	bool flip,
	bool bckg_class,
	const std::string& seed,
	size_t n_thread)
	{ size_t n_flip = flip + 1 ;

	- // Matrix2D<int> seq(file) ;
	- Matrix2D<int> seq(char_to_int(Matrix2D<char>(file))) ;
	+ Matrix2D<int> seq(file) ;
	+ // Matrix2D<int> seq(char_to_int(Matrix2D<char>(file))) ;

	// partition
	EMSequence em(seq,
	n_class,
	n_iter,
	n_shift,
	flip,
	bckg_class,
	seed,
	n_thread) ;
	em.classify() ;

	// get post prob
	Matrix4D<double> prob = em.get_post_prob() ;

	- std::cerr << prob << std::endl ;
	-
	// compute models
	SequenceModelComputer mc(std::move(seq),
	prob,
	bckg_class,
	n_thread) ;

	Matrix2D<double> model = mc.get_model() ;

	// compute the class prob
	size_t n_row = prob.get_dim()[0] ;
	std::vector<double> class_prob(n_class, 0.) ;
	double p_tot = 0. ;
	for(size_t i=0; i<n_row; i++)
	{ for(size_t j=0; j<n_class; j++)
	{ for(size_t k=0; k<n_shift; k++)
	{ for(size_t l=0; l<n_flip; l++)
	{ class_prob[j] += prob(i,j,k,l) ;
	p_tot += prob(i,j,k,l) ;
	}
	}
	}
	}
	for(auto& prob : class_prob)
	{ prob /= p_tot ; }
	// create a matrix containing the class prob in the 1st
	// column and the model in the remaining columns
	Matrix2D<double> model_final(model.get_nrow(),
	model.get_ncol() + 1) ;
	size_t i_class = 0 ;
	for(size_t i=0; i<model_final.get_nrow(); i++)
	{ if((i != 0) and (i % 4 == 0))
	{ i_class++ ; }
	model_final(i,0) = class_prob[i_class] ;
	}
	// fill the remaining with the model parameters
	for(size_t i=0; i<model.get_nrow(); i++)
	{ for(size_t j=0; j<model.get_ncol(); j++)
	{ model_final(i,j+1) = model(i,j) ; }
	}
	return model_final ;
	}

	Matrix2D<double> test_aggregate(const std::string& file)
	{ Matrix2D<int> seq(file) ;
	size_t nrow = seq.get_nrow() ;
	size_t ncol = seq.get_ncol() ;

	Matrix2D<double> model_final(4, ncol+1, 1e-100) ;

	for(size_t i=0; i<4; i++)
	{ model_final(i,0) = 1. ; }

	std::vector<double> sum(ncol+1, 0.) ;

	for(size_t i=0; i<nrow; i++)
	{ for(size_t j=0; j<ncol; j++)
	{ int base = seq(i,j) ;
	if(base == dna::char_to_int('N'))
	{ continue ; }
	model_final(base, j+1) += 1. ;
	sum[j+1] += 1. ;
	}
	}

	for(size_t i=0; i<model_final.get_nrow(); i++)
	{ for(size_t j=1; j<model_final.get_ncol()+1; j++)
	{ model_final(i,j) /= sum[j] ; }
	}


	return model_final ;
	}

	int main()
	{
	size_t n_class = 2 ;
	size_t n_iter = 50 ;
	- size_t n_shift = 3 ;
	+ size_t n_shift = 1 ;
	bool flip = true ;
	bool bckg_class = false ;
	std::string seed = "" ;
	size_t n_thread = 1 ;

	- /*
	+
	std::string file = "/local/groux/scATAC-seq/data/"
	"10xgenomics_PBMC_5k_motifs/sp1_motifs_10e-7_sequences.mat" ;
	- */

	// std::string file = "/local/groux/scATAC-seq/test.mat" ;
	// std::string file = "/local/groux/scATAC-seq/data/toy_data/simulated_sequences_1class_noflip.mat" ;
	// std::string file = "/local/groux/scATAC-seq/data/toy_data/simulated_sequences_1class_flip.mat" ;
	- std::string file = "/local/groux/scATAC-seq/data/toy_data/simulated_sequences_2class_flip.mat" ;
	+ // std::string file = "/local/groux/scATAC-seq/data/toy_data/simulated_sequences_2class_flip.mat" ;

	/*
	Matrix2D<double> model_final = test_consensus(file,
	n_class,
	n_iter,
	n_shift,
	flip,
	bckg_class,
	seed,
	n_thread) ;
	- */

	+ */
	Matrix2D<double> model_final = test_sequence(file,
	n_class,
	n_iter,
	n_shift,
	flip,
	bckg_class,
	seed,
	n_thread) ;

	+
	/*
	Matrix2D<double> model_final = test_aggregate(file) ;
	*/

	std::cout << model_final << std::endl ;


	/*
	double p0 = 0 ;
	// double l0 = log(p0) ;

	double p1 = 1. - (3.*p0) ;
	// double l1 = log(p1) ;

	// Matrix3D<double> cseq(1, 4, 4) ;
	// cseq(0, 0, 0) = p1 ; cseq(0, 0, 1) = p0 ; cseq(0, 0, 2) = p0 ; cseq(0, 0, 3) = p0 ;
	// cseq(0, 1, 0) = p0 ; cseq(0, 1, 1) = p1 ; cseq(0, 1, 2) = p0 ; cseq(0, 1, 3) = p0 ;
	// cseq(0, 2, 0) = p0 ; cseq(0, 2, 1) = p0 ; cseq(0, 2, 2) = p1 ; cseq(0, 2, 3) = p0 ;
	// cseq(0, 3, 0) = p0 ; cseq(0, 3, 1) = p0 ; cseq(0, 3, 2) = p0 ; cseq(0, 3, 3) = p1 ;

	Matrix2D<int> seq(1, 4, -1) ;
	seq(0, 0) = 0 ; seq(0, 1) = 1 ; seq(0, 2) = 2 ; seq(0, 3) = 3 ;
	Matrix3D<double> cseq = sequence_to_consensus(seq) ;

	std::cout << cseq << std::endl << std::endl ;

	Matrix2D<double> mod(4, 4, -1) ;
	mod(0,0) = p1 ; mod(0,1) = p0 ; mod(0,2) = p0 ; mod(0,3) = p0 ;
	mod(1,0) = p0 ; mod(1,1) = p1 ; mod(1,2) = p0 ; mod(1,3) = p0 ;
	mod(2,0) = p0 ; mod(2,1) = p0 ; mod(2,2) = p1 ; mod(2,3) = p0 ;
	mod(3,0) = p0 ; mod(3,1) = p0 ; mod(3,2) = p0 ; mod(3,3) = p1 ;

	std::cout << mod << std::endl ;

	std::cout << score_consensus_subseq(cseq, 0,0,mod)
	<< std::endl ;
	*/
	return EXIT_SUCCESS ;
	}

No OneTemporaryActions

File Metadata

View Options

Event Timeline

No OneTemporary
Actions