diff --git a/images/ch_atac-seq/data_classCTCF_8class.png b/images/ch_atac-seq/data_classCTCF_8class.png
index 8d8c63a..d224780 100644
Binary files a/images/ch_atac-seq/data_classCTCF_8class.png and b/images/ch_atac-seq/data_classCTCF_8class.png differ
diff --git a/images/ch_atac-seq/data_classPU.1_2class.png b/images/ch_atac-seq/data_classPU.1_2class.png
index 87d3c6b..0b1ad38 100644
Binary files a/images/ch_atac-seq/data_classPU.1_2class.png and b/images/ch_atac-seq/data_classPU.1_2class.png differ
diff --git a/images/ch_atac-seq/data_classjun_3class.png b/images/ch_atac-seq/data_classjun_3class.png
index 8791eb3..943f0fc 100644
Binary files a/images/ch_atac-seq/data_classjun_3class.png and b/images/ch_atac-seq/data_classjun_3class.png differ
diff --git a/images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png b/images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png
index 4d3ca10..eac673a 100644
Binary files a/images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png and b/images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png differ
diff --git a/images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png b/images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png
index 74e72a9..6056e6a 100644
Binary files a/images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png and b/images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png differ
diff --git a/images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png b/images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png
index 860bf77..25a9fa5 100644
Binary files a/images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png and b/images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png differ
diff --git a/images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png b/images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png
index 1265432..c5e7592 100644
Binary files a/images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png and b/images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png differ
diff --git a/images/ch_atac-seq/sp1_motifs_6class_shift_flip.png b/images/ch_atac-seq/sp1_motifs_6class_shift_flip.png
index a939e6a..c99cad0 100644
Binary files a/images/ch_atac-seq/sp1_motifs_6class_shift_flip.png and b/images/ch_atac-seq/sp1_motifs_6class_shift_flip.png differ
diff --git a/images/ch_spark/figure1.pdf b/images/ch_spark/figure1.pdf
deleted file mode 100644
index ce84ff3..0000000
Binary files a/images/ch_spark/figure1.pdf and /dev/null differ
diff --git a/images/ch_spark/figure1.png b/images/ch_spark/figure1.png
new file mode 100644
index 0000000..5cff1c8
Binary files /dev/null and b/images/ch_spark/figure1.png differ
diff --git a/images/ch_spark/supplemental_figure1.pdf b/images/ch_spark/supplemental_figure1.pdf
deleted file mode 100644
index 90d3ab2..0000000
Binary files a/images/ch_spark/supplemental_figure1.pdf and /dev/null differ
diff --git a/images/ch_spark/supplemental_figure1.png b/images/ch_spark/supplemental_figure1.png
new file mode 100644
index 0000000..80436fc
Binary files /dev/null and b/images/ch_spark/supplemental_figure1.png differ
diff --git a/images/ch_spark/supplemental_figure2.pdf b/images/ch_spark/supplemental_figure2.pdf
deleted file mode 100644
index 403a02b..0000000
Binary files a/images/ch_spark/supplemental_figure2.pdf and /dev/null differ
diff --git a/images/ch_spark/supplemental_figure2.png b/images/ch_spark/supplemental_figure2.png
new file mode 100644
index 0000000..c901092
Binary files /dev/null and b/images/ch_spark/supplemental_figure2.png differ
diff --git a/images/ch_spark/supplemental_figure4.pdf b/images/ch_spark/supplemental_figure4.pdf
deleted file mode 100644
index d1fcc93..0000000
Binary files a/images/ch_spark/supplemental_figure4.pdf and /dev/null differ
diff --git a/images/ch_spark/supplemental_figure4.png b/images/ch_spark/supplemental_figure4.png
new file mode 100644
index 0000000..417eb23
Binary files /dev/null and b/images/ch_spark/supplemental_figure4.png differ
diff --git a/images/ch_spark/supplemental_figure5.pdf b/images/ch_spark/supplemental_figure5.pdf
deleted file mode 100644
index 8ced377..0000000
Binary files a/images/ch_spark/supplemental_figure5.pdf and /dev/null differ
diff --git a/images/ch_spark/supplemental_figure5.png b/images/ch_spark/supplemental_figure5.png
new file mode 100644
index 0000000..cca2e5b
Binary files /dev/null and b/images/ch_spark/supplemental_figure5.png differ
diff --git a/images/ch_spark/supplemental_figure8.pdf b/images/ch_spark/supplemental_figure8.pdf
deleted file mode 100644
index af42418..0000000
Binary files a/images/ch_spark/supplemental_figure8.pdf and /dev/null differ
diff --git a/images/ch_spark/supplemental_figure8.png b/images/ch_spark/supplemental_figure8.png
new file mode 100644
index 0000000..d7a4579
Binary files /dev/null and b/images/ch_spark/supplemental_figure8.png differ
diff --git a/main/ch_encode_peaks.aux b/main/ch_encode_peaks.aux
index 2216e96..742412c 100644
--- a/main/ch_encode_peaks.aux
+++ b/main/ch_encode_peaks.aux
@@ -1,160 +1,160 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
 \citation{consortium_integrated_2012}
 \citation{cheng_understanding_2012}
 \citation{cheng_understanding_2012}
 \citation{mathelier_jaspar_2014}
 \citation{kulakovskiy_hocomoco:_2016}
 \citation{jolma_dna-binding_2013}
 \citation{cheng_understanding_2012}
 \citation{mathelier_jaspar_2014}
 \citation{kulakovskiy_hocomoco:_2016}
 \citation{jolma_dna-binding_2013}
 \citation{cheng_understanding_2012}
 \citation{kundaje_ubiquitous_2012}
 \citation{thurman_accessible_2012}
 \citation{gerstein_architecture_2012}
 \@writefile{toc}{\contentsline {chapter}{\numberline {3}ENCODE peaks analysis}{33}{chapter.3}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
 \@writefile{loa}{\addvspace {10\p@ }}
 \newlabel{encode_peaks}{{3}{33}{ENCODE peaks analysis}{chapter.3}{}}
 \@writefile{toc}{\contentsline {chapter}{ENCODE peaks analysis}{33}{chapter.3}}
 \@writefile{toc}{\contentsline {section}{\numberline {3.1}Data}{33}{section.3.1}}
-\@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces \textbf  {Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep  {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000.\relax }}{34}{figure.caption.15}}
-\newlabel{encode_peaks_gm12878_peak_number}{{3.1}{34}{\textbf {Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000.\relax }{figure.caption.15}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces \textbf  {Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep  {mathelier_jaspar_2014}, HOCOMOCO v10 \citep  {kulakovskiy_hocomoco:_2016} or Jolma \citep  {jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep  {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.\relax }}{34}{figure.caption.16}}
-\newlabel{encode_peaks_gm12878_motif_prop}{{3.2}{34}{\textbf {Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.\relax }{figure.caption.16}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces \textbf  {Number of peaks in GM12878} called by ENCODE for each ChIP-seq experiment. The different TFs are colored by type, as defined by \citep  {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000 peaks respectively.\relax }}{34}{figure.caption.15}}
+\newlabel{encode_peaks_gm12878_peak_number}{{3.1}{34}{\textbf {Number of peaks in GM12878} called by ENCODE for each ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000 peaks respectively.\relax }{figure.caption.15}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces \textbf  {Proportion of peaks with a motif in GM12878}, for each ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM modeling the TF binding specificity. Each TF was associated to a log-odd PWM contained either from JASPAR Core vertebrate 2014 \citep  {mathelier_jaspar_2014}, HOCOMOCO v10 \citep  {kulakovskiy_hocomoco:_2016} or Jolma \citep  {jolma_dna-binding_2013} collection. If a motif instance (with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$) could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep  {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.\relax }}{34}{figure.caption.16}}
+\newlabel{encode_peaks_gm12878_motif_prop}{{3.2}{34}{\textbf {Proportion of peaks with a motif in GM12878}, for each ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM modeling the TF binding specificity. Each TF was associated to a log-odd PWM contained either from JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance (with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$) could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.\relax }{figure.caption.16}{}}
 \citation{wu_biogps:_2016}
 \citation{nair_probabilistic_2014}
 \@writefile{toc}{\contentsline {section}{\numberline {3.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{35}{section.3.2}}
 \newlabel{encode_peaks_chippartitioning}{{3.2}{35}{ChIPPartitioning : an algorithm to identify chromatin architectures}{section.3.2}{}}
 \newlabel{encode_peaks_eq_em_data_model}{{3.1}{35}{ChIPPartitioning : an algorithm to identify chromatin architectures}{equation.3.2.1}{}}
 \citation{bailey_fitting_1994}
 \citation{nair_probabilistic_2014}
 \newlabel{encode_peaks_eq_em_update}{{3.2}{36}{ChIPPartitioning : an algorithm to identify chromatin architectures}{equation.3.2.2}{}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {3.2.1}Data realignment}{36}{subsection.3.2.1}}
 \newlabel{encode_peaks_data_realign}{{3.2.1}{36}{Data realignment}{subsection.3.2.1}{}}
 \citation{kundaje_ubiquitous_2012}
 \citation{zhang_canonical_2014}
 \@writefile{toc}{\contentsline {section}{\numberline {3.3}Nucleosome organization around transcription factor binding sites}{37}{section.3.3}}
-\@writefile{lof}{\contentsline {figure}{\numberline {3.3}{\ignorespaces \textbf  {Chromatin pattern around TF binding sites in GM12878 :} \textbf  {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf  {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.\relax }}{38}{figure.caption.17}}
-\newlabel{encode_peaks_array_measure}{{3.3}{38}{\textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.\relax }{figure.caption.17}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {3.3}{\ignorespaces \textbf  {Chromatin pattern around TF binding sites in GM12878 :} \textbf  {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TF binding site using 10bp bins. The TF binding site were then classified into 4 classes according to their nucleosome patterns using ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf  {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represents the proportion of the highest signal for each chromatin pattern.\relax }}{38}{figure.caption.17}}
+\newlabel{encode_peaks_array_measure}{{3.3}{38}{\textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TF binding site using 10bp bins. The TF binding site were then classified into 4 classes according to their nucleosome patterns using ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represents the proportion of the highest signal for each chromatin pattern.\relax }{figure.caption.17}{}}
 \citation{kundaje_ubiquitous_2012,fu_insulator_2008}
-\@writefile{toc}{\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{39}{section.3.4}}
-\newlabel{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}{{3.4}{39}{The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{section.3.4}{}}
 \@writefile{lof}{\contentsline {figure}{\numberline {3.4}{\ignorespaces \textbf  { Colocalization with CTCF peaks in GM12878 cells : } \textbf  {A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf  {B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf  {C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.\relax }}{40}{figure.caption.18}}
 \newlabel{encode_peaks_colocalization_ctcf}{{3.4}{40}{\textbf { Colocalization with CTCF peaks in GM12878 cells : } \textbf {A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf {B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf {C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.\relax }{figure.caption.18}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {3.5}{\ignorespaces \textbf  {Nucleosome free region at CTCF binding sites} \textbf  {a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf  {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }}{41}{figure.caption.19}}
-\newlabel{encode_peaks_ctcf_ndr}{{3.5}{41}{\textbf {Nucleosome free region at CTCF binding sites} \textbf {a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }{figure.caption.19}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {3.5}{\ignorespaces \textbf  {Nucleosome free regions at CTCF binding sites} \textbf  {A} The NDR lengths are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf  {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }}{41}{figure.caption.19}}
+\newlabel{encode_peaks_ctcf_ndr}{{3.5}{41}{\textbf {Nucleosome free regions at CTCF binding sites} \textbf {A} The NDR lengths are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }{figure.caption.19}{}}
 \citation{stedman_cohesins_2008}
 \citation{losada_cohesin_2014}
 \citation{donohoe_identification_2007}
 \citation{bailey_znf143_2015}
+\@writefile{toc}{\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{42}{section.3.4}}
+\newlabel{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}{{3.4}{42}{The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{section.3.4}{}}
 \citation{ong_ctcf:_2014,ghirlando_ctcf:_2016}
 \citation{wang_sequence_2012,neph_expansive_2012,consortium_integrated_2012,guo_high_2012}
 \citation{chatr-aryamontri_biogrid_2017}
 \citation{wang_sequence_2012,neph_expansive_2012,consortium_integrated_2012,guo_high_2012}
 \citation{chatr-aryamontri_biogrid_2017}
 \citation{ghirlando_ctcf:_2016}
 \citation{ong_ctcf:_2014}
 \@writefile{toc}{\contentsline {section}{\numberline {3.5}CTCF and JunD interactomes}{43}{section.3.5}}
 \@writefile{lof}{\contentsline {figure}{\numberline {3.6}{\ignorespaces \textbf  {CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf  {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf  {B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf  {C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref  {encode_peaks_methods_data}).\relax }}{44}{figure.caption.20}}
 \newlabel{encode_peaks_ctcf_association}{{3.6}{44}{\textbf {CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf {B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref {encode_peaks_methods_data}).\relax }{figure.caption.20}{}}
 \@writefile{lot}{\contentsline {table}{\numberline {3.1}{\ignorespaces  \textbf  {Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite  {wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep  {chatr-aryamontri_biogrid_2017}.\relax }}{45}{table.caption.21}}
 \newlabel{encode_peaks_association_table}{{3.1}{45}{\textbf {Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite {wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep {chatr-aryamontri_biogrid_2017}.\relax }{table.caption.21}{}}
 \citation{wang_sequence_2012,neph_expansive_2012,consortium_integrated_2012,guo_high_2012,chatr-aryamontri_biogrid_2017}
 \citation{gaffney_controls_2012}
 \citation{gaffney_controls_2012}
 \citation{boller_defining_2018}
 \citation{hagman_early_2005}
 \citation{maier_early_2004,boller_pioneering_2016}
 \@writefile{toc}{\contentsline {section}{\numberline {3.6}EBF1 binds nucleosomes}{47}{section.3.6}}
 \@writefile{lof}{\contentsline {figure}{\numberline {3.7}{\ignorespaces \textbf  {EBF1 binding sites} stand on the edge of a nucleosome. \textbf  {A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep  {gaffney_controls_2012}. \textbf  {B} Dinucleotide frequencies around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. \textbf  {C} Motif frequency around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{48}{figure.caption.22}}
 \newlabel{encode_peaks_ebf1}{{3.7}{48}{\textbf {EBF1 binding sites} stand on the edge of a nucleosome. \textbf {A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep {gaffney_controls_2012}. \textbf {B} Dinucleotide frequencies around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. \textbf {C} Motif frequency around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.22}{}}
 \citation{trifonov_cracking_2011}
 \citation{ioshikhes_variety_2011,trifonov_cracking_2011,gaffney_controls_2012}
 \citation{boller_pioneering_2016}
 \citation{dreos_mga_2018}
 \citation{gerstein_architecture_2012}
 \citation{mathelier_jaspar_2014}
 \citation{kulakovskiy_hocomoco:_2016}
 \citation{jolma_dna-binding_2013}
 \@writefile{toc}{\contentsline {section}{\numberline {3.7}Discussion}{50}{section.3.7}}
 \@writefile{toc}{\contentsline {section}{\numberline {3.8}Methods}{50}{section.3.8}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.1}Data and data processing}{50}{subsection.3.8.1}}
 \newlabel{encode_peaks_methods_data}{{3.8.1}{50}{Data and data processing}{subsection.3.8.1}{}}
 \citation{gaffney_controls_2012}
 \citation{boyle_high-resolution_2008}
 \citation{dreos_eukaryotic_2017}
 \citation{siepel_evolutionarily_2005}
 \citation{ambrosini_chip-seq_2016}
 \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.2}Classification of MNase patterns}{51}{subsection.3.8.2}}
 \newlabel{encode_peaks_em_mnase}{{3.8.2}{51}{Classification of MNase patterns}{subsection.3.8.2}{}}
 \citation{nair_probabilistic_2014}
 \citation{zhang_canonical_2014}
 \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.3}Quantifying nucleosome array intensity from classification results}{52}{subsection.3.8.3}}
 \citation{ambrosini_chip-seq_2016}
 \citation{ambrosini_chip-seq_2016}
 \newlabel{encode_peaks_equation_shift_density1}{{3.3}{53}{Quantifying nucleosome array intensity from classification results}{equation.3.8.3}{}}
 \newlabel{encode_peaks_equation_shift_density2}{{3.4}{53}{Quantifying nucleosome array intensity from classification results}{equation.3.8.4}{}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.4}Peak colocalization}{53}{subsection.3.8.4}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.5}NDR detection}{54}{subsection.3.8.5}}
 \newlabel{encode_peaks_algo_ndr_extend}{{1}{55}{NDR detection}{algocfline.1}{}}
 \@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Searches the coordinates of the NDR using the posterior nucleosome and nucleosome free class probabilities, for a region $R_i$, from its central position.\relax }}{55}{algocf.1}}
 \citation{mathelier_jaspar_2014}
 \citation{kulakovskiy_hocomoco:_2016}
 \citation{ambrosini_pwmscan:_2018}
 \citation{ambrosini_chip-seq_2016-1}
 \citation{ambrosini_chip-seq_2016-1}
 \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.6}CTCF and JunD interactors}{56}{subsection.3.8.6}}
 \citation{ambrosini_chip-seq_2016-1}
 \citation{gaffney_controls_2012}
 \citation{ambrosini_signal_2003}
 \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.7}EBF1 and nucleosome}{57}{subsection.3.8.7}}
 \@setckpt{main/ch_encode_peaks}{
 \setcounter{page}{59}
 \setcounter{equation}{8}
 \setcounter{enumi}{8}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
 \setcounter{enumiv}{0}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{0}
 \setcounter{chapter}{3}
 \setcounter{section}{8}
 \setcounter{subsection}{7}
 \setcounter{subsubsection}{0}
 \setcounter{paragraph}{0}
 \setcounter{subparagraph}{0}
 \setcounter{figure}{7}
 \setcounter{table}{1}
 \setcounter{NAT@ctr}{0}
 \setcounter{FBcaption@count}{0}
 \setcounter{ContinuedFloat}{0}
 \setcounter{KVtest}{0}
 \setcounter{subfigure}{0}
 \setcounter{subfigure@save}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
 \setcounter{subtable@save}{0}
 \setcounter{lotdepth}{1}
 \setcounter{lips@count}{0}
 \setcounter{lstnumber}{1}
 \setcounter{Item}{8}
 \setcounter{Hfootnote}{0}
 \setcounter{bookmark@seq@number}{0}
 \setcounter{AM@survey}{0}
 \setcounter{ttlp@side}{0}
 \setcounter{myparts}{0}
 \setcounter{parentequation}{0}
 \setcounter{AlgoLine}{28}
 \setcounter{algocfline}{1}
 \setcounter{algocfproc}{1}
 \setcounter{algocf}{1}
 \setcounter{float@type}{8}
 \setcounter{nlinenum}{0}
 \setcounter{lstlisting}{0}
 \setcounter{section@level}{0}
 }
diff --git a/main/ch_encode_peaks.tex b/main/ch_encode_peaks.tex
index 7d5e981..b1c07a9 100644
--- a/main/ch_encode_peaks.tex
+++ b/main/ch_encode_peaks.tex
@@ -1,484 +1,484 @@
 \cleardoublepage
 \chapter{ENCODE peaks analysis}
 \label{encode_peaks}
 \markboth{ENCODE peaks analysis}{ENCODE peaks analysis}
 \addcontentsline{toc}{chapter}{ENCODE peaks analysis}
 
 % Modeling a TF sequence specificity only allows to partially understand how a TF binds a region. Indeed, scanning a genome using a PWM for putative binding sites often returns tens of thousands of sites with only a subset of them being really occupied within a cell. Other elements such as chromatin organization and composition are likely to drive TF binding. Thus gaining a better understanding about the chromat
 
 % The exact mechanisms at play remain unclear but nucleosome occupancy is thought to shelter DNA sequence - as some bases are facing the core octamer or to distort the DNA structure - impeding sequence recognition by TFs. In vivo, evidences for competition between TFs and nucleosomes have been collected. Computational simulations accounting for simultaneous multiple factor binding on DNA suggested that nucleosome occupancy and TFs binding influence each other and that TF binds nucleosome depleted regions \cite{wasson_ensemble_2009}.
 
 As discussed in Chapter \ref{intro}, the structure of the chromatin has a deep impact on TF binding. It is now clear that nucleosome occupancy fulfills more than a packaging role. It can also acts as a barrier to impede DNA reading processes and compete with TFs for sequence occupancy. Thus gaining a better understanding of how chromatin is organized around TF binding sites is crucial to understand TF binding beyond their sequence specificity only.
 
 In an effort to better understand how the genome is organized and how its functions are fulfilled, 
 the ENCODE Consortium \citep{consortium_integrated_2012} released an impressive collection of coherent data representing an unprecedented picture of the chromatin in several human cell lines. 
 
 The GM12878 cells were chosen as one of the highest priority cell line. GM12878 retained the ability to divide but show a normal karyotype - unlike HeLa cells. Additionally, their genome has been sequences. All together, these features make of GM12878 cells a good model for genomic studies.
 
 
 \section{Data}
 % number of peaks per dataset
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.3]{images/ch_encode_peaks/peaklist_peaknumber_GM12878.png}  
-	\captionof{figure}{\textbf{Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000.}
+	\captionof{figure}{\textbf{Number of peaks in GM12878} called by ENCODE for each ChIP-seq experiment. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000 peaks respectively.}
 \label{encode_peaks_gm12878_peak_number}
 \end{center}
 \end{figure}
 
 % proportion of peaks with motif per dataset
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.3]{images/ch_encode_peaks/peaklist_proportions_GM12878.png}  
-	\captionof{figure}{\textbf{Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep{mathelier_jaspar_2014}, HOCOMOCO v10 \citep{kulakovskiy_hocomoco:_2016} or Jolma \citep{jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.}
+	\captionof{figure}{\textbf{Proportion of peaks with a motif in GM12878}, for each ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM modeling the TF binding specificity. Each TF was associated to a log-odd PWM contained either from JASPAR Core vertebrate 2014 \citep{mathelier_jaspar_2014}, HOCOMOCO v10 \citep{kulakovskiy_hocomoco:_2016} or Jolma \citep{jolma_dna-binding_2013} collection. If a motif instance (with a score corresponding to a pvalue higher or equal to $1\cdot10^{-4}$) could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.}
 \label{encode_peaks_gm12878_motif_prop}
 \end{center}
 \end{figure}
 
-During its production phase in 2012, the ENCODE Consortium released ChIP-seq data 53 different TFs, nucleosome occupancy data (MNase-seq, \cite{kundaje_ubiquitous_2012}) and chromatin accessiblity data (DNase-seq, \citep{thurman_accessible_2012}) that were generated with a depth of coverage in GM12878 cells. The ENCODE Consortium also released ChIP-seq peaks called using a uniform processing pipeline \citep{gerstein_architecture_2012}. These peaks account for i) technical variability as they they are called from technical replicates and ii) inter peak caller discrepancies as several peak callers results were integrated together as part of the peak calling pipeline. These peaks are thus reproducible and robust to software related biases and can be considered as an excellent standard. 
+During its production phase in 2012, the ENCODE Consortium released ChIP-seq data for 53 different TFs, nucleosome occupancy data (MNase-seq, \cite{kundaje_ubiquitous_2012}) and chromatin accessiblity data (DNase-seq, \citep{thurman_accessible_2012}) that were generated with a high depth of coverage in GM12878 cells. The ENCODE Consortium also released ChIP-seq peaks called using a uniform processing pipeline \citep{gerstein_architecture_2012}. These peaks account for i) technical variability as they are called from technical replicates and ii) inter peak caller discrepancies as several peak callers results were integrated together as part of the peak calling pipeline. These peaks are thus reproducible and robust to software related biases and can be considered as an excellent standard. 
 
 All data were taken from the MGA repository. The ChIP-seq peaks can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/Uniform-TFBS/Uniform-TFBS.html}, the MNase-seq data at \url{https://ccg.epfl.ch/mga/hg19/encode/GSE35586/GSE35586.html} and the DNase-seq at \url{https://ccg.epfl.ch/mga/hg19/encode/UW-DNaseI-HS/UW-DNaseI-HS.html}.
 
-The number of peaks called for each TF was highly variable and likely reflects each factor activity in this cell line (Figure \ref{encode_peaks_gm12878_peak_number}). The most abundant factor in terms of peaks was RUNX3 followed by CTCF. This observation fits to BioGPS \citep{wu_biogps:_2016} data which indicates that both RUNX3 and CTCF have a higher expression in lymphoblast and in B cells compared to other tissues. Moreover, the propensity of each TF to bind through their motifs was also variable, with again CTCF being showing the highest values \ref{encode_peaks_gm12878_motif_prop}.
+The number of peaks called for each TF was highly variable and likely reflects each factor activity in this cell line (Figure \ref{encode_peaks_gm12878_peak_number}). The most abundant factor in terms of peaks was RUNX3 followed by CTCF. This observation fits to BioGPS \citep{wu_biogps:_2016} data which indicates that both RUNX3 and CTCF have a higher expression in lymphoblast and in B cells compared to other tissues. Moreover, the propensity of each TF to bind through their motifs was also variable, with again CTCF being showing the highest values (Figure \ref{encode_peaks_gm12878_motif_prop}).
 
 
 \section{ChIPPartitioning : an algorithm to identify chromatin architectures}
 \label{encode_peaks_chippartitioning}
 
 % Discovering archetypical chromatin architectures over a set of regions of interest - let's say containing a TF binding site in their middle - is a long standing problem in bioinformatics. More formerly, given a matrix $R$ of dimensions $NxL$ containing $N$ vectors of read counts $r_{1}, r_{2}, ..., r_{N}$ of length $L$, each containing the number of reads mapping at a given position in a given region, find $K \leq N$ vectors of length $L' \leq L$ that contain archetypical signals found in the $N$ regions of $R$. This can actually be solved using clustering methods which groups regions that look alike into $K$ groups. The summary  of the signal inside each group - for instance the mean signal for the K-means algorithm - can then be interpreted as the archetypical chromatin architectures. Biologically, different organization may reflect different functions.
 
 % First, the $N$ regions of interest are usually aligned with respect to a feature of interest, for instance a TF binding sites. However, he chromatin features of interest - for instance the nucleosomes - may not be aligned from one region to the next. This can originate because i) of the true binding sites being fuzzely distributed around the center of the regions, ii) the chromatin features appear at a varying distance from the region centers or iii) both. Comparing two regions then necessitate to first realign the chromatin features. Second, the regions can show a functional orientation. For instance, TF binding sites have an upstream and a downstream with respect to the bound sequence. Orienting properly the regions is also required to properly compare the chromatin organizations in two regions. Finally, the signal over some regions may be sparse because of a sub-optimal sequencing depth.
 
 % The study of signal distribution over genomic regions has been a quite active field for bulk sequencing experiments during the last decade. Dedicated algorithms \citep{hon_chromasig:_2008,nielsen_catchprofiles:_2012,kundaje_ubiquitous_2012,nair_probabilistic_2014,groux_spar-k:_2019} have been developed to cluster genomic regions based on their distribution of reads.
 
 % Most of these  algorithms and softwares deal with some of these issues cited above. However, the algorithm developed by \citep{nair_probabilistic_2014} - which I will call ChIPPartitioning - is probably the best. ChIPPartitioning is a probabilistic partitioning method that softly clusters a sets of genomic regions based on their signal shape (as opposed to the absolute values) resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. 
 
-As discussed in section \ref{intro_pattern_discovery}, pattern discovery is a long standing bioinformatic problem and several algorithms have been proposed to solve it. ChIPPartitioning \citep{nair_probabilistic_2014} is probably the best of them. It is a probabilistic partitioning algorithm that softly clusters a sets of genomic regions based on their signal shape (as opposed to the absolute values) resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. 
+As discussed in section \ref{intro_pattern_discovery}, pattern discovery is a long standing bioinformatic problem and several algorithms have been proposed to solve it. ChIPPartitioning \citep{nair_probabilistic_2014} is probably the best of them. It is a probabilistic partitioning algorithm that softly clusters a set of genomic regions based on their signal shape (as opposed to the absolute values) resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. 
 
-ChIPPartitioning (a graphic representation of the algorithm can be found further below in Figure \ref{atac_seq_em}) models the signal over $N$ region of length $L$ as having being sampled from a mixture of $K$ different signal models (classes), using $L$ independent Poisson distributions for each region. The number of reads sequenced over this region is then the result of this sampling process. Each class model is represented by a vector $c_{k}$ of size $L' \leq L$ that represent the expected number of reads at each position for that class. These values are thus the Poisson distribution parameters. The number of reads $r_{i,j}$ at position $j$, in a region $i$ is :
+ChIPPartitioning (a graphic representation of the algorithm can be found further below in Figure \ref{atac_seq_em}) models the signal over $N$ region of length $L$ as having being sampled from a mixture of $K$ different read density models (classes), using $L$ independent Poisson distributions for each region. The number of reads sequenced over this region is then the result of this sampling process. Each class model is represented by a vector $C_{k}$ of size $L' \leq L$ that represent the expected number of reads at each position for class $k$. These values are thus the Poisson distribution parameters. The number of reads $r_{i,j}$ at position $j$, in a region $i$ is :
 
 \begin{equation}
 	\label{encode_peaks_eq_em_data_model}
 	r_{i,j} = \sum_{k=1}^{K} p_{k} \times X_{i,j,k}
 \end{equation}
 
-where $p_{k}$ is the probability of the k-th class and $X_{i,j,k}$ the number of reads sampled from $Poisson(\lambda=c_{k,j})$.
+where $p_{k}$ is the probability of the class $k$ and $X_{i,j,k}$ the number of reads sampled from $Poisson(\lambda=c_{k,j})$.
 
 In order to discover the $K$ different class models - that are the chromatin signatures to find - in the data, the algorithm proceed to a maximum likelihood estimation of the Poisson distribution parameters $c_{1}, c_{2}, ..., c_{k}$ and the class probabilities $p_{1}, p_{2}, ..., p_{k}$ using an expectation-maximization (EM) framework. During the E-step, the likelihood $P(r_{i}|c_{k})$ of each region $i$, given each class $k$ and a posterior probability $P(c_{k}|r_{i})$ are computed. The posterior probabilities are interpreted as the probability that $r_{i}$ belongs to class $k$. Eventually, during the M-step, the class models $c_{1}, c_{2}, ..., c_{k}$ are updated using :
 
 \begin{equation}
 	\label{encode_peaks_eq_em_update}
 	c_{k,j} = \sum_{i=1}^{N} p_{k} \times r_{i,j}
 \end{equation}
 
 This procedure is actually a weighted and ungaped data alignment in which the posterior probabilities are the weights with the class models containing the average number of reads at each position of the alignment.
 
 Since each region is computed a probability to belong to each class, it participates to the update of all the class models, with different weights.
 
-If the length of the chromatin signature searched $L'<L$, then the algorithm slides a window along the regions and searched for this signature at each possible offset. This is how it deals with alignment issue. The signal orientation issue is tackled by also performing a searched with the flipped model.
+If the length of the chromatin signature searched $L'<L$, then the algorithm slides a window of length $L'=L-S+1$ along the regions, at each possible offset $1,2,...,S$, and searched for this signature at each possible offset. This is how it deals with alignment issue. The signal orientation issue is tackled by also performing a searched with the flipped model.
 
 At the end of the process, this algorithm returns a posterior probability matrix of dimensions $NxKxSx2$ with $S=L-L'+1$ corresponding to regions, classes, shift states ($S$) and flip states (forward and reverse). In other words, ChIPPartitioning computes a probability of belonging to each class, to each window (in both orientation) in each region.
 
 Because the estimation of the class model parameters is done using an EM framework, ChIPPartitioning is a heuristic algorithm. The final parameter estimates depend on the starting state (which is set randomly) and on the number of iterations run. Finally, it is worth mentioning that this algorithm is close to the MEME algorithm \citep{bailey_fitting_1994} that models DNA sequences as being sampled from a two class mixture model that represents the DNA motif to find and the noise.
 
 Regarding implementation details, ChIPPartitioning was implemented in R programming language, as it was proposed in the supplemental material of \citep{nair_probabilistic_2014}. Nonetheless, ChIPPartitioning turned out to be relatively slow dued to the quite heavy computations it has to carry out (logarithms, exponentials and probability computations), the intrinsic limitations of the R programming language and the lack of optimization in the implementation. 
 
 \subsection{Data realignment}
 \label{encode_peaks_data_realign}
 
 ChIPPartitioning computes a set of posterior probabilities and use them to perform the class model updates. As illustrated in Figure \ref{atac_seq_em}, this procedure is actually a weighted and ungaped data alignment in which the posterior probabilities are the weights.
 
 It is absolutely feasible to run a partitioning on a given matrix $A$, for instance MNase-seq read counts, using ChIPPartitioning, and to subsequently use the obtained posterior probabilities to compute the class models, using another data matrix, let us say $B$ of DNase-seq reads.
 
-This procedure allows to realign a dataset $B$ as $A$ in order to co-visualize different types of signals. The only things that should be taken care of is that matrices $A$ and $B$ should have the same dimensions.
+This procedure allows to realign a dataset $B$ as $A$ in order to co-visualize different types of signals. The only things that should be taken care of is that matrices $A$ and $B$ should have the same dimensions and that the genomic positions inside both matrices are strictly identical.
 
 In the following sections, this is the procedure that will be used to overlay different types of data for a given partition.
 
 
 \section{Nucleosome organization around transcription factor binding sites}
 
 % examples of partitions
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.25]{images/ch_encode_peaks/MNase_profiles.png}  
-	\captionof{figure}{\textbf{Chromatin pattern around TF binding sites in GM12878 :} \textbf{A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display  nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf{B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.}
+	\captionof{figure}{\textbf{Chromatin pattern around TF binding sites in GM12878 :} \textbf{A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TF binding site using 10bp bins. The TF binding site were then classified into 4 classes according to their nucleosome patterns using ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display  nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf{B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represents the proportion of the highest signal for each chromatin pattern.}
 \label{encode_peaks_array_measure}
 \end{center}
 \end{figure}
 
-For each dataset, the peak coordinates were reassigned to the closest TF motif, if any. However dealing with unaligned signal was still necessary. Indeed, it could not be excluded that the differents TFs would not be the anchor of the chromatin organization around them and have nucleosome arrays at variable distances from their binding sites. Furthermore, dealing with the region orientation was also needed because i) all peaks did not contain a motif indicating the directionality of the binding site (Figure \ref{encode_peaks_gm12878_motif_prop}) and ii) as before, the TF binding site may not be the main driving force of the neighboring chromatin organization. However, this pre-processing step, even if it could not resolve entirely this issue, could at least soften it.
+For each dataset, the peak coordinates were reassigned to the best TF motif, if any in the peak. However dealing with unaligned signal was still necessary. Indeed, it could not be excluded that the differents TFs would not be the anchor of the chromatin organization around them and have nucleosome arrays at variable distances from their binding sites. Furthermore, dealing with the region orientation was also needed because i) all peaks did not contain a motif indicating the directionality of the binding site (Figure \ref{encode_peaks_gm12878_motif_prop}) and ii) as before, the TF binding site may not be the main driving force of the neighboring chromatin organization. However, this pre-processing step, even if it could not resolve entirely this issue, could at least soften it.
 
-To uncover the different nucleosome architectures around TF binding site, one partition per peaklist based on the MNase-seq signal and using ChIPPartitioning were performed. Because the time required to run the partitioning procedure is long and is a linear function of the number of classes, the choice of four classes was a compromise allowing to discover several chromatin architectures while not being computationally to intense. ChIPPartitioning was also given a freedom of shifting of 15 bins (corresponding to -70bp, -60bp, ..., 0bp, ..., +60bp, +70bp) and of flipping. A visual inspection of the results revealed that all classes, for all TFs, show a nucleosome array on at least one of the side of the TF binding site (examples are displayed in Figures \ref{suppl_encode_peaks_em_ctcf}, \ref{suppl_encode_peaks_em_nrf1}, \ref{suppl_encode_peaks_em_cfos} and \ref{suppl_encode_peaks_em_max}). Additionally, it was also possible to see an increased chromatin accessibility and sequence conservation at the level of the binding site. The enhanced chromatin accessibility is compatible with the current view of TFs binding nucleosome depleted regions \citep{kundaje_ubiquitous_2012}. However, the absence of a footprint like signal is explained by the shifting. By shifting and flipping the regions, ChIPPartitioning realigns the signal over these regions, at the cost of unphasing the binding sites.
+To uncover the different nucleosome architectures around TF binding sites, one partition per peaklist based on the MNase-seq signal was performed using ChIPPartitioning. Because the time required to run the partitioning procedure is long and is a linear function of the number of classes, the choice of four classes was a compromise allowing to discover several chromatin architectures while not being computationally to intense. ChIPPartitioning was also given a freedom of shifting of 15 bins (corresponding to -70bp, -60bp, ..., 0bp, ..., +60bp, +70bp) and of flipping. A visual inspection of the results revealed that all classes, for all TFs, show a nucleosome array on at least one of the side of the TF binding site (examples are displayed in Figures \ref{suppl_encode_peaks_em_ctcf}, \ref{suppl_encode_peaks_em_nrf1}, \ref{suppl_encode_peaks_em_cfos} and \ref{suppl_encode_peaks_em_max}). Additionally, it was also possible to see an increased chromatin accessibility and sequence conservation at the level of the binding site. The enhanced chromatin accessibility is compatible with the current view of TFs binding nucleosome depleted regions \citep{kundaje_ubiquitous_2012}. However, the absence of a footprint like signal is explained by the shifting. By shifting and flipping the regions, ChIPPartitioning realigns the signal over these regions, at the cost of unphasing the binding sites.
 
-A noticeable exception to this rule was the early Early B-cell factor 1 (EBF1) that seemed to had nucleosome arrays spanning its binding sites (Figure \ref{encode_peaks_array_measure}B).
+A noticeable exception to this rule was the early Early B-cell factor 1 (EBF1) that seemed to have nucleosome arrays spanning its binding sites (Figure \ref{encode_peaks_array_measure}B).
 
 In order to explore more carefully to what extent nucleosome arrays may be organized with respect to each TF binding sites, I used the mean array density measure developed by \citep{zhang_canonical_2014}. A class pattern showing well positioned nucleosomes is typically showing sharp regions of strong signal separated by signal depleted regions reflecting of the alternance of nucleosome presence/absence. The method developed by Zhang and colleagues basically searches for strong variations of signal. The higher the score, the most the pattern contains well positioned nucleosomes. On the other hand, the ability of a TF to act as an anchor for arrays organization was measured as the standard deviation of the shift used by ChIPPartitioning. Briefly, it is possible to compute the probability density of the usage of each shift state. Assessing how much the different shift states were used is indicative of how much the individual patterns were aligned at the beginning. A low standard deviation value indicates that the shifting tends to be the same for all binding sites and thus that the nucleosome arrays occur at a fixed - unspecified - distance from the binding site. In this case, the binding site could be the array anchor. 
 
-Both values were measured for all classes discovered, for all TFs. The results are displayed in Figure \ref{encode_peaks_array_measure}. First, it was possible to identify a sub-population of classes in which the TF binding site seemed to act as an anchor for the nucleosomes. This represented binding sites for CTCF, RAD21, SMC3, YY1 and ZNF143 (see Figure \ref{encode_peaks_array_measure}A, points 6,8,10,13,14,15,18 and 19). A closer inspection of these class patterns showed a strong DNaseI footprint and a peak of sequence conservation. A DNaseI footprint is a typical pattern - composed of a signal depletion in between two signal enriched regions - revealing a region protected against the action of DNaseI  by the binding of a factor. The presence of a clear footprint indicates that the underlying binding sites were aligned, supporting the fact that the binding sites are anchors for the nucleosome organization. This was further supported by the sharp peak of sequence conservation indicating, most likely reflecting the TF motif. Nonetheless, all other classes showed a wide and fuzzy chromatin accessibility pattern, as illustrated by ATF3 in Figure\ref{encode_peaks_array_measure}B, indicating miss-aligned binding sites.
+Both values were measured for all classes discovered, for all TFs. The results are displayed in Figure \ref{encode_peaks_array_measure}. First, it was possible to identify a sub-population of classes in which the TF binding site seemed to act as an anchor for the nucleosomes. This represented binding sites for CTCF, RAD21, SMC3, YY1 and ZNF143 (see Figure \ref{encode_peaks_array_measure}A, points 6,8,10,13,14,15,18 and 19). A closer inspection of these class patterns showed a strong DNaseI footprint and a peak of sequence conservation. A DNaseI footprint is a typical pattern - composed of a signal depletion in between two signal enriched regions - revealing a region protected against the action of DNaseI  by the binding of a factor. The presence of a clear footprint indicated that the underlying binding sites were aligned, supporting the fact that the binding sites were anchors for the nucleosome organization. This was further supported by the sharp peak of sequence conservation indicating, most likely, the TF motif. All other classes showed a wide and fuzzy chromatin accessibility pattern, as illustrated by ATF3 in Figure\ref{encode_peaks_array_measure}B, indicating miss-aligned binding sites.
 
 Breast cancer type 1 susceptibility protein (BRCA1) was also identified using this method. The identified class (class 3, see Figure \ref{suppl_encode_peaks_em_brca1}) indeed showed well positioned nucleosomes. However, I decided not to consider this hit for two reasons : i) there was not footprint in the nucleosome depleted region indicating that the sites are not aligned and ii) the ENCODE consortium labeled this peak list as problematic (low reproducibility read coverage).
 
-Finally, it should be noted that noisy MNase-seq patterns were attributed  high nucleosome array density scores. Because the nucleosome signal is noisy, it varies a lot and got a good score. Such classes are found in the cloud of points just above the horizontal line on the right of the plot (mostly RNAPIII peak classes). Second, some CTCF binding sites displayed strongly positioned nucleosome, confirming previous reports \citep{kundaje_ubiquitous_2012, fu_insulator_2008}.
+Finally, it should be noted that noisy MNase-seq patterns were attributed  high nucleosome array density scores. Because the nucleosome signal is noisy, it varies a lot and gets a good score. Such classes were found in the cloud of points just above the horizontal line on the right of the plot (mostly RNAPIII peak classes). Second, some CTCF binding sites displayed strongly positioned nucleosome, confirming previous reports \citep{kundaje_ubiquitous_2012, fu_insulator_2008}.
 
-Thus even if all classes showed at least one nucleosome array, it seems that most of the TFs are not the force driving the array organization, with the notable exceptions of CTCF, RAD21, SMC3, YY1 and ZNF143.
+Thus even if all classes showed at least one nucleosome array, it seems that most of the TFs are not the force driving the array organization, with the noticeable exceptions of CTCF, RAD21, SMC3, YY1 and ZNF143.
 
 
 \section{The case of CTCF, RAD21, SMC3, YY1 and ZNF143}
 \label{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.25]{images/ch_encode_peaks/colocalization_ctcf.png}
 	\captionof{figure} {\textbf{ Colocalization with CTCF peaks in GM12878 cells : } \textbf{A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf{B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf{C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.}
 \label{encode_peaks_colocalization_ctcf}
 \end{center}
 \end{figure}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.4]{images/ch_encode_peaks/CTCF_ndr_length_rad212.png}
-	\captionof{figure} {\textbf{Nucleosome free region at CTCF binding sites} \textbf{a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf{B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.}
+	\captionof{figure} {\textbf{Nucleosome free regions at CTCF binding sites} \textbf{A} The NDR lengths are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf{B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.}
 \label{encode_peaks_ctcf_ndr}
 \end{center}
 \end{figure}
 
 
-Two possible alternative hypotheses can explain the presence of these strong nucleosome arrays around these TFs binding sites. First, each TF has the ability to drive the formation of well spaced nucleosome arrays in their vicinity. Second, all the classes detected contains the same set of genomic regions.
+Two possible alternative hypotheses could explain the presence of these strong nucleosome arrays around these TFs binding sites. First, each TF has the ability to drive the formation of well spaced nucleosome arrays in their vicinity. Second, all the classes detected contains the same set of genomic regions.
 
 Two obsevations strongly support the second hypothesis. First CTCF is known to interact with the cohesin complex \citep{stedman_cohesins_2008} - composed of SMC1, SMC3, RAD21 and either STAG1 or STAG2 \citep{losada_cohesin_2014} -, with YY1  \citep{donohoe_identification_2007} and with ZNF143 \citep{bailey_znf143_2015}. Second, the YY1 and ZNF143 showed \url{~50}\% and \url{~10}\% of direct binding respectively (Figure \ref{encode_peaks_gm12878_motif_prop}), leaving the possibility of an indirect binding mechanism, for instance through CTCF.
 
-To further confirm this hypothesis, I measured the extent to which CTCF and these other TF peaks co-localized. To do so, each RAD21, SMC1, YY1 and ZNF143 peak was checked for the presence of a CTCF peak. The results, shown in Figure\ref{encode_peaks_colocalization_ctcf}A, support the four already known interaction between CTCF and the cohesin complex members RAD21 and SMC3, between CTCF and YY1 and to a lesser extent and between CTCF and ZNF143. Additionally, for YY1 and ZNF143, the presence of CTCF and of a canonical motif happen at separated peak subsets, as shown in Figure \ref{encode_peaks_colocalization_ctcf}B, suggesting two different binding strategies : i) through a direct recognition of the motif or ii) through another mechanism leading to a co-localization with CTCF - most likely through binding to CTCF. 
+To further confirm this hypothesis, I measured the extent to which CTCF and the other TF peaks co-localized. To do so, each RAD21, SMC1, YY1 and ZNF143 peak was checked for the presence of a CTCF peak. The results, shown in Figure\ref{encode_peaks_colocalization_ctcf}A, supported the four already known interactions between CTCF and the cohesin complex members RAD21 and SMC3, between CTCF and YY1 and to a lesser extent and between CTCF and ZNF143. Additionally, for YY1 and ZNF143, the presence of CTCF and of a canonical motif happen at separated peak subsets, as shown in Figure \ref{encode_peaks_colocalization_ctcf}B, suggesting two different binding strategies : i) through a direct recognition of the motif or ii) through another mechanism leading to a co-localization with CTCF - most likely through binding to CTCF. 
 
-Peaks are represented by the maximum read density position, as defined by ENCODE. Thus, the effective binding site of these TF can by anywhere in the peak. As a matter of fact, ZNF143 and YY1 may bind close but without direct interaction with CTCF. If SMC3, RAD21, YY1 or ZNF143 physically interact with CTCF and bind as a complex, one prediction would be that an extended nucleosome depleted region (NDR) should be observed to allow these complexes to bind.
+Peaks are represented by the maximum read density position, as defined by ENCODE. Thus, the effective binding site of these TF can by anywhere in the peak. As a matter of fact, ZNF143 and YY1 may bind close but without direct interaction with CTCF. If SMC3, RAD21, YY1 or ZNF143 physically interact with CTCF and bind as a complex, one prediction would be that an extended NDR should be observed to allow these complexes to bind.
 
 In order to verify this hypothesis, I set up a classification method that assigns either a "nucleosome" or a "free" label to each position, in a given region based the MNase-seq signal. Assuming that the center of the CTCF peaks is in a NDR, these positions were labeled as 'free' and from there, the neighboring positions on the left and on the right were classified until finding the first position labeled 'nucleosome' (see Figure \ref{suppl_encode_peaks_ctcf_ndr}). The size spanned by the regions labeled as 'free' were then measured for each CTCF binding site. The NDR lengths were finally grouped according to the presence of RAD21, SCM3, YY1 or ZNF143 (Figure \ref{encode_peaks_ctcf_ndr}).
 
 First, it seems that CTCF binding sites are distributed in two functional groups of regions based on the presence of other interactors : i) promoter distant regions with both RAD21 and SMC3 (the cohesin complex), ii) promoters together with YY1 and/or ZNF143. This segregation likely reflects different functions of CTCF : i) looping related functions with the cohesin complex and ii) a regulator of transcription with other partners. The fact that promoter enriched groups show an increased NDR, can be explained by an enhanced chromatin opening to accommodate for the presence of other TFs and of the RNAPII.
 
 Interestingly the subgroups containing the cohesin complex (in orange in Figure \ref{encode_peaks_ctcf_ndr}A) show a NDR length that is function of the number of TFs present (cohesin < cohesin + YY1/ZNF143 < cohesin + YY1 + ZNF143). Because such these sites are away from promoters, it is really likely that the increased NDR size is only caused by the binding of a larger CTCF complex. Furthermore, their reduced NDR size measured is compatible with the classes of binding sites showing strong nucleosome arrays.
 
-Finally, in order to reveal the nucleosome organization around each subset of peaks, I performed a ChIPPartitioning classification method using two classes, with one of them set to represent a flat signal (and to act as a "waste" class). The aim was to make a clear difference between "typical" and "non-typical" nucleosome organizations. For RAD12, SMC3, YY1 and ZNF143 the results showed that strong nucleosome arrays on both sides and a clear DNaseI footprint are only present when a CTCF is also present, as illustrated for YY1 in Figure \ref{encode_peaks_colocalization_ctcf}C.
+Finally, in order to reveal the nucleosome organization around each subset of peaks, I performed a ChIPPartitioning classification method using two classes, with one of them set to represent a flat signal (and to act as a "waste" class). The aim was to make a clear difference between "typical" and "non-typical" nucleosome organizations. For RAD12, SMC3, YY1 and ZNF143 the results showed that strong nucleosome arrays on both sides and a clear DNaseI footprint were only present when CTCF was also present, as illustrated for YY1 in Figure \ref{encode_peaks_colocalization_ctcf}C.
 
-Together, these results support the hypothesis that CTCF forms a complex with YY1 and/or ZNF143, additionally than with the cohesin complex. They also support the  fact that only CTCF has the property of positioning nucleosome into regular arrays in its vicinity and that any other TF showing such a behaviour is likely binding with or near CTCF. As important, the apparent seggregation in terms of regions bounds by the different CTCF complexes is consistent with the hypothesis that the different functions of CTCF depends on its interactors \citep{ong_ctcf:_2014, ghirlando_ctcf:_2016}.
+Together, these results support the hypothesis that CTCF forms a complex with YY1 and/or ZNF143, additionally than with the cohesin complex. They also support the  fact that only CTCF has the property of positioning nucleosomes into regular arrays in its vicinity and that any other TF showing such a behaviour is likely binding with CTCF. As important, the apparent seggregation in terms of regions bounds by the different CTCF complexes is consistent with the hypothesis that the different functions of CTCF depends on its interactors \citep{ong_ctcf:_2014, ghirlando_ctcf:_2016}.
 
 
 \section{CTCF and JunD interactomes}
 
 % \begin{figure}
 % \begin{center}
 % 	\includegraphics[scale=0.4]{images/ch_encode_peaks/TF_associations.png}  
 % 	\captionof{figure}{\textbf{Possible interaction scenarios between TFs} \textbf{A} Indirect co-binding. The TFs dimerize and bind together on DNA. \textbf{B} Indirect co-binding. Both TF dimerize but only one binds the DNA, the other (the blue) is the tethering factor. \textbf{C} Independent co-binding. Both TF bind in close vicinity but without forming a complex. Both TFs may not be necessarily bound at the same time. \textbf{D} Interference. Both motifs partially or totally overlap each other. Whether only one TF or both can bind at the same time is unknown.}
 % \label{encode_peaks_tf_association}
 % \end{center}
 % \end{figure}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.4]{images/ch_encode_peaks/ctcf_motif_association.png}  
 	\captionof{figure}{\textbf{CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf{A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf{B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf{C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref{encode_peaks_methods_data}).}
 \label{encode_peaks_ctcf_association}
 \end{center}
 \end{figure}
 
 \begin{table}
 \begin{center}
 	\begin{tabular}{ |c|c|c|l|l|c|c| }
   	\hline
   	\multicolumn{7}{|c|}{Curated associations} \\
   	\hline
   	TF$_{A}$ & TF$_{B}$ & Motif ass. & Type  & Binder & Reported & Validated \\
   	\hline
   	CTCF & ATF2   & pos & indep.co-bind &       & no  & no \\
   	CTCF & EBF1   & pos & indep.co-bind &       & yes & no \\
   	CTCF & MAZ    & pos & indep.co-bind &       & yes & no \\
   	CTCF & NFYb   & pos & indep.co-bind &       & yes & no \\
   	CTCF & NFkB   & pos & indep.co-bind &       & yes & no \\
   	CTCF & PAX5   & pos & indep.co-bind &       & yes & no \\
   	CTCF & SP1    & pos & indep.co-bind &       & yes & no \\
   	CTCF & BATF   & neg & indir.co-bind & BATF  & yes & no \\
   	CTCF & ELF1   & neg & indir.co-bind & ELF1  & yes & no \\
   	CTCF & IRF4   & neg & indir.co-bind & CTCF  & yes & no \\
   	CTCF & MEF2a  & neg & indir.co-bind & both  & yes & no \\
   	CTCF & MEF2c  & neg & indir.co-bind & both  & yes & no \\
   	CTCF & NFATc  & neg & indir.co-bind & CTCF  & no  & no \\
   	CTCF & NFYa   & neg & indir.co-bind & CTCF  & yes & no \\
   	CTCF & NRF1   & neg & indir.co-bind & CTCF  & yes & no \\
   	CTCF & NRSF   & neg & indir.co-bind & CTCF  & yes & no \\
   	CTCF & PAX5   & neg & indir.co-bind & both  & yes & no \\
   	CTCF & POU2f  & neg & indir.co-bind & POU2f & yes & no \\
   	CTCF & RUNX3  & neg & indir.co-bind & both  & no  & no \\
   	CTCF & SRF    & neg & indir.co-bind & CTCF  & yes & no \\  
   	CTCF & USF1   & neg & indir.co-bind & both  & yes & no \\  
   	CTCF & YY1    & neg & indir.co-bind & CTCF  & yes & yes\\  
   	CTCF & ZNF143 & neg & indir.co-bind & CTCF  & yes & no \\
   	\hline
   	JunD & BHLHE40 & neg & indir.co-bind & BHLHE40 & yes & no \\
   	JunD & CTCF    & neg & indir.co-bind & CTCF    & yes & no \\
   	JunD & EBF1    & neg & indir.co-bind & EBF1    & yes & no \\
   	JunD & EGR1    & neg & indir.co-bind & EGR1    & yes & yes\\
   	JunD & ELK1    & neg & unknown       &         & no  & no \\
   	JunD & IRF4    & neg & indir.co-bind & JunD    & yes & yes\\
   	JunD & MAZ     & neg & indir.co-bind & MAZ     & no  & no \\
   	JunD & PAX5    & neg & indir.co-bind & PAX5    & yes & no \\
   	JunD & SP1     & neg & indir.co-bind & SP1     & yes & yes\\
   	JunD & USF2    & neg & indir.co-bind & USF2    & yes & no \\
   	JunD & YY1     & neg & indir.co-bind &         & yes & yes\\
   	JunD & ZBTB33  & neg & unknown       &         & yes & no \\    
   	\hline
 	\end{tabular}
         
 	\captionof{table} { \textbf{Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep{chatr-aryamontri_biogrid_2017}.}
 \label{encode_peaks_association_table}
 \end{center}
 \end{table}
 
 
 The study of co-binding with CTCF showed that it was possible to detect global associations. I already detected that the cohesin complex members SMC3 and RAD21 form a complex with CTCF, as expected from literature \citep{ghirlando_ctcf:_2016}. Additionally, I detected that YY1 and ZNF143  are also frequently associated with CTCF, which has also been reported \citep{ong_ctcf:_2014}.
 
 Thus, I decided to push forward in this direction. To this end, I set up a method based on motif co-occurrence to i) relieve the necessity of observing similar chromatin architectures, as in the previous section and ii) be able to functionally characterize the detected interactions. 
 
-As previously discussed (see section \ref{intro_tf_cobinding}), several types of functional interactions between two TFs $A$ and $B$ exist : direct co-binding, indirect co-binding, independent co-binding and interference. Because the binding mechanisms are different from each other, different observations are expected. In the case of direct co-binding, both TF motifs are expected to appear in close vicinity, more often than by chance. Moreover, a spatial constrain (both spacing and orientation) reflecting the complex structure is also expected to occur. In the case of indirect co-binding, if TF$_{A}$ is the factor binding its motif and TF$_{B}$ is the tethering factor, both motifs are expected to repel (avoid) each other at TF$_{A}$ binding sites. In the case of independent co-binding, both motif$_{A}$ and motif$_{B}$ are expected to be enriched at both TF$_{A}$ and TF$_{B}$ binding sites. However, no spatial constrain is expected between the motifs. Finally, in the case of interference, both motifs are expected to overlap. However, this may not be difficult to detect.
+As previously discussed (see section \ref{intro_tf_cobinding}), several types of functional interactions between two TFs $A$ and $B$ exist : direct co-binding, indirect co-binding, independent co-binding and interference. Because the binding mechanisms are different from each other, different observations are expected. In the case of direct co-binding, both TF motifs are expected to appear in close vicinity, more often than by chance. Moreover, a spatial constrain (both spacing and orientation) reflecting the complex structure is also expected to occur. In the case of indirect co-binding, if TF$_{A}$ is the factor binding its motif and TF$_{B}$ is the tethering factor, both motifs are expected to repel (avoid) each other at TF$_{A}$ binding sites. In the case of independent co-binding, both motif$_{A}$ and motif$_{B}$ are expected to be enriched at both TF$_{A}$ and TF$_{B}$ binding sites. However, no spatial constrain is expected between the motifs. Finally, in the case of interference, both motifs are expected to overlap. However, this may be difficult to detect.
 
 % Several types of functional associations can occur between a TF$_{A}$ and a TF$_{B}$. Because each one of them brings different expected patterns in the data, it should be possible to detect and disentangle them. First two TFs can dimerize and bind to DNA using both DNA binding domains (DBDs) [REFERENCE NEEDED] (Figure \ref{encode_peaks_tf_association}A). I will refer to this case as \textbf{direct co-binding}. If this happens, both TF motifs are expected to appear in close vicinity, more often than by chance. Moreover, a spatial constrain (both spacing and orientation) reflecting the complex structure is also expected to occur. Second, two TFs can dimerize and bind to DNA using only one of the DBDs. This will result in having one of the TF bound to DNA while the other one will tether DNA through its interaction with the other TF (Figure \ref{encode_peaks_tf_association}B). This case will be referred to as \textbf{indirect co-binding}. In such a case, if TF$_{A}$ is the factor binding its motif and TF$_{B}$ is the tethering factor, both motifs are expected to repel (avoid) each other at TF$_{A}$ binding sites. Third, two TFs can both bind DNA using their own DBDs, in close vicinity but without any physical interaction (Figure \ref{encode_peaks_tf_association}C). In such as case, both motif$_{A}$ and motif$_{B}$ are expected to be enriched at both TF$_{A}$ and TF$_{B}$ binding sites. However, no spatial constrain is expected between the motifs. This case will be refered to as \textbf{independent co-binding}. This can be caused by a temporal relationship between both TFs where both TFs can bind to a given region asynchronously. For instance, a first TF is recruited to its binding site and ensures - somehow - a proper chromatin environment for another TF, such as illustrated during macrophage and B cells progenitors commitment \citep{heinz_simple_2010}. Finally, in case of a partial or total motif overlap, both TFs may be observed to be bound together (Figure \ref{encode_peaks_tf_association}D). In such a case, different phenomenons may explain this observation. A first possible explanation would be that two TFs compete to bind to the same region. Observing both TFs bound together could be due to an overlap of data from different cells in which only one TF is bound at the time. A second possible explanation would be that, for some reason, only one TF is bound, never the other. However, I prefer to be cautious regarding the causal mechanisms and this case will be referred to as an \textbf{interference}.
 
-In order to collect more evidences about functional connections between TFs, I developed a simple analysis pipeline able to detect the expected patterns of motifs described above. Briefly, given a set of binding sites for a TF$_{A}$, it is possible to construct a contingency matrix containing the number of binding site with i) motif$_{A}$ and motif$_{B}$, ii) motif$_{A}$ only, iii) motif$_{B}$ only or iv) no motif and assess whether both motifs are associated or avoid each other using an exact Fisher test. Then, for pairs of motifs showing an association, displaying the spatial distribution of the motif may help to discriminate whether or not there is a spacing constrain or a motif overlap.
+In order to collect more evidence about functional connections between TFs, I developed a simple analysis pipeline able to detect the expected patterns of motifs described above. Briefly, given a set of binding sites for a TF$_{A}$, it is possible to construct a contingency matrix containing the number of binding site with i) motif$_{A}$ and motif$_{B}$, ii) motif$_{A}$ only, iii) motif$_{B}$ only or iv) no motif and assess whether both motifs are associated or avoid each other using an exact Fisher test. Then, for pairs of motifs showing an association, displaying the spatial distribution of the motif may help to discriminate whether or not there is a spacing constrain or a motif overlap.
 
-I investigated the association of 47 TFs for which 53 datasetes were available in GM12878 cells with CTCF or JunD. CTCF was chosen because i) most of its binding sites have a short nucleosome depleted region and show only a peak of sequence conservation at the binding site leaving a restricted space for other motifs to co-occur (Figure \ref{suppl_encode_peaks_em_ctcf}) and ii) I already collected several observation regarding CTCF. JunD was chosen as a complementary example to CTCF in the sense that i) contrarily to CTCF, it is only a trancriptional regulator, ii) it is expected to bind to regulatory regions mostly thus to open chromatin regions where other motifs are expected to co-occur , iii) \url{~50}\% of the peaks have a motif versus \url{~80}\% to \url{~90}\% for CTCF peaklists (Figure \ref{encode_peaks_gm12878_motif_prop}).
+I investigated the association of 47 TFs for which 53 datasetes were available in GM12878 cells with CTCF or JunD. CTCF was chosen because i) most of its binding sites have a short nucleosome depleted region and show a sharp peak of sequence conservation at the binding site leaving a restricted space for other motifs to co-occur (Figure \ref{suppl_encode_peaks_em_ctcf}) and ii) I already collected several observation regarding CTCF. JunD was chosen as a complementary example to CTCF in the sense that i) contrarily to CTCF, it is only a trancriptional regulator, ii) it is expected to bind to regulatory regions mostly, thus to open chromatin regions where other motifs are expected to be present, iii) \url{~50}\% of the peaks have a motif versus \url{~80}\% to \url{~90}\% for CTCF peaklists (Figure \ref{encode_peaks_gm12878_motif_prop}).
 
 % motif co occurence
-Motif co-occurrence analysis suggested several interactions. Regarding CTCF motif (Figure \ref{encode_peaks_ctcf_association}A), 8 positive motif association (ATF2, EBF1, MAZ, NFYb, NFkB, PAX5, SP1, YY1) and 16 negative motif associations (BATF, ELF1, IRF4, MEF2a, MEF2c, NFATc, NFYa, NRF1, NRSF/REST, PAX5, POU2F2/OCT2, RUNX3, SRF, USF1, YY1 and ZNF143) with other motifs were found. Regarding JunD (Figure \ref{suppl_encode_peaks_jund_association}A), positive motif association with 2 others TF motifs (BATF, cFos) and 12 negative associations with others TF motifs (ATF2, BHLHE40, CTCF, EBF1, EGR1, ELK1, IRF4, MAZ, PAX5, SP1, USF2, YY1 and ZBTB33) were found. cFos and one of the YY1-Sydh peaklists displayed evidences of poor quality (not shown and annotated as such by the ENCODE Consortium). Additionally, ATF2 is an AP1 member which possess a 2bp spacer (TGANNTCA) while JunD is a 1bp motif  space (TGANTCA). Thus the strong negative interaction may simply be due to the fact that both motifs are simply mutually exclusive. In consequence, the positive associations CTCF-YY1 and JunD-cFos and the negative association JunD-ATF2 should be ignored. Additionally, JunD and BATF motifs are the same as both these TFs belong to the AP1 family. In consequence, it is impossible to say whether BATF peaks harbour a JunD or a BATF site. Thus this association should be ignored as well, leaving no positive association left with JunD motif.
+The motif co-occurrence analysis suggested several interactions. Regarding CTCF motif (Figure \ref{encode_peaks_ctcf_association}A), 8 positive motif association (ATF2, EBF1, MAZ, NFYb, NFkB, PAX5, SP1, YY1) and 16 negative motif associations (BATF, ELF1, IRF4, MEF2a, MEF2c, NFATc, NFYa, NRF1, NRSF/REST, PAX5, POU2F2/OCT2, RUNX3, SRF, USF1, YY1 and ZNF143) were found. Regarding JunD (Figure \ref{suppl_encode_peaks_jund_association}A), positive motif association with 2 others TF motifs (BATF, cFos) and 12 negative associations with others TF motifs (ATF2, BHLHE40, CTCF, EBF1, EGR1, ELK1, IRF4, MAZ, PAX5, SP1, USF2, YY1 and ZBTB33) were found. cFos and one of the YY1-Sydh peaklists displayed evidence of poor quality (not shown and annotated as such by the ENCODE Consortium). Additionally, ATF2 belongs to the members of the AP1 family that have a 2bp spacer (TGANNTCA) while JunD has a 1bp spacer (TGANTCA). Thus the strong negative interaction may simply be due to the fact that both motifs are simply mutually exclusive. In consequence, the positive associations CTCF-YY1 and JunD-cFos and the negative association JunD-ATF2 should be ignored. Additionally, JunD and BATF motifs are the same as both these TFs belong to the AP1 family. In consequence, it is impossible to say whether BATF peaks harbour a JunD or a BATF site. Thus this association should be ignored as well, leaving no positive association left with JunD motif.
 
 % densities
-The analysis of CTCF and JunD motif occurrence densities (Figures \ref{encode_peaks_ctcf_association}B and C and Figure \ref{suppl_encode_peaks_jund_association}B and C) revealed further interesting details regarding possible association mechanisms. First, positive associations showed CTCF density patterns mostly compatible with the direct co-binding and the independent co-binding scenarios (see Figure \ref{encode_peaks_ctcf_association}B). However, making a clear distinction between both is often impossible. For instance, both EBF1 peaklists show a decrease in CTCF motif density \url{~10}bp after the peak followed by an increase which could represent the spacer between CTCF and EBF1. However this is followed by a rather wide CTCF motif presence, mostly suggesting an independent co-binding scenario. An interesting candidate for a direct co-binding with CTCF is RXRa (Figure \ref{encode_peaks_ctcf_association}B). Even though the motif association was not significant, a focused co-localization of both motif appears. Second, negative associations showed CTCF and JunD density patterns compatible with the indirect co-binding scenario where the TFs would tether through CTFC or JunD, i.e. the CTCF or JunD motifs do not show a spacing constrain with the binding sites but are rather spread over ~100bp around binding sites without their own motif (Figure \ref{encode_peaks_ctcf_association}C and Figure \ref{suppl_encode_peaks_jund_association}C). Interestingly, CTCF motif around YY1 and ZNF143 binding sites lacking their own motifs (see bottom of Figure \ref{encode_peaks_ctcf_association}C) showed really focused densities, indicating that for some reason, the CTCF motif is well localized. Even if unexpected, this observation is not incompatible with the indirect co-binding scenario and further supports the results from section \ref{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}.
+The analysis of CTCF and JunD motif occurrence densities (Figures \ref{encode_peaks_ctcf_association}B and C and Figure \ref{suppl_encode_peaks_jund_association}B and C) revealed further interesting details regarding possible association mechanisms. First, positive associations showed CTCF density patterns mostly compatible with the direct co-binding and the independent co-binding scenarios (see Figure \ref{encode_peaks_ctcf_association}B). However, making a clear distinction between both is often really difficult. For instance, both EBF1 peaklists showed a decreased in CTCF motif density \url{~10}bp after the peak, followed by an increase which could represent the spacer between CTCF and EBF1. However this is followed by a rather wide CTCF motif presence, mostly suggesting an independent co-binding scenario. An interesting candidate for a direct co-binding with CTCF is RXRa (Figure \ref{encode_peaks_ctcf_association}B). Even though the motif association was not significant, a focused co-localization of both motif appears. Second, negative associations showed CTCF and JunD density patterns compatible with the indirect co-binding scenario where the TFs would tether through CTFC or JunD, i.e. the CTCF or JunD motifs do not show a spacing constrain with the binding sites but are rather spread over ~100bp around binding sites without their own motif (Figure \ref{encode_peaks_ctcf_association}C and Figure \ref{suppl_encode_peaks_jund_association}C). Interestingly, CTCF motif around YY1 and ZNF143 binding sites lacking their own motifs (see bottom of Figure \ref{encode_peaks_ctcf_association}C) showed really focused densities, indicating that for some reason, the CTCF motif is well localized. Even if unexpected, this observation is not incompatible with the indirect co-binding scenario and further supports the results from section \ref{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}.
 
 % results
-To summarize, the motif association statistics allowed me to identify 35 associations of TFs with either CTCF or JunD (Table \ref{encode_peaks_association_table}). The strongest negative interactions for CTCF were ZNF143 and YY1, supporting the results found in the previous sections. The analysis of CTCF and JunD motif spatial distributions around peaks and a closer examination of the contingency matrices allowed to suggest details about the interacting mechanisms, including which TF binds DNA. The only two exceptions were JunD-ELK1 and JunD-ZBTB33 for which the motif occurrence densities were uninformative. Finally, out of these 35 associations, 5 were supported by experimental evidences and 5 were not already reported in previous studies or databases \citep{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012, chatr-aryamontri_biogrid_2017}.
+To summarize, the motif association statistics allowed me to identify 35 associations of TFs with either CTCF or JunD (Table \ref{encode_peaks_association_table}). The strongest negative interactions for CTCF were ZNF143 and YY1, supporting the results found in the previous sections. The analysis of CTCF and JunD motif spatial distributions around peaks and a closer examination of the contingency matrices allowed to suggest details about the interacting mechanisms, including which TF binds DNA. The only two exceptions were JunD-ELK1 and JunD-ZBTB33 for which the motif occurrence densities were uninformative. Finally, out of these 35 associations, 5 were supported by experimental evidence and 5 have not already reported in previous studies or databases \citep{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012, chatr-aryamontri_biogrid_2017}.
 
 
 \section{EBF1 binds nucleosomes}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.4]{images/ch_encode_peaks/ebf1_haib_1.png}  
 	\captionof{figure}{\textbf{EBF1 binding sites} stand on the edge of a nucleosome. \textbf{A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep{gaffney_controls_2012}. \textbf{B} Dinucleotide frequencies around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. \textbf{C} Motif frequency around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.}
 \label{encode_peaks_ebf1}
 \end{center}
 \end{figure}
 
 % As presented above (section \ref{encode_peaks_chippartitioning}), EBF1 binding sites does not seem to present a NDR seem to be covered by a nucleosome array. This observation suggest that EBF1 can bind to nucleosomal DNA. However, because ChIPPartitioning realigns the data, one possible explanation is that it failed to properly aligned the data and that the results do not reflect reality.
 
 % In order to clarify this, I looked at the MNase digestion profile - more specifically, at the distribution of nucleosome dyads - at EBF1 binding sites.
 
-EBF1 is a crucial factor for B cell development. It is necessary in the early steps, for a proper lineage commitment as well as later on during the entire B cell development \citep{boller_defining_2018}. Since many years, EBF1 has been though to be able to "pioneer early changes in the target gene chromatin necessary for transcriptional activation" and proper B cell development \citep{hagman_early_2005}. Experimental evidences supported that EBF1 could be able to bind compacted naive chromatin (without noticeable mark/modification), leading to a local chromatin opening, H3K4me2 deposition, DNA demethylation and gene activation \citep{maier_early_2004,boller_pioneering_2016}. If such features makes a lot of sense during lineage commitment, the some underlying mechanisms remained mysterious, especially how EBF1 primarily binds to closed chromatin. With regard to this, the results of section \ref{encode_peaks_chippartitioning}, suggesting that EBF1 binding sites may be covered by nucleosome arrays, rose my attention. In order to collect evidences that may shed light on this, I conducted a deeper exploration of the EBF1 binding sites.
+EBF1 is a crucial factor for B cell development. It is necessary in the early steps, for a proper lineage commitment as well as later on during the entire B cell development \citep{boller_defining_2018}. Since many years, EBF1 has been though to be able to "pioneer early changes in the target gene chromatin necessary for transcriptional activation" and proper B cell development \citep{hagman_early_2005}. Experimental evidence supported that EBF1 could be able to bind compacted naive chromatin (without noticeable mark/modification), leading to a local chromatin opening, H3K4me2 deposition, DNA demethylation and gene activation \citep{maier_early_2004,boller_pioneering_2016}. If such features makes a lot of sense during lineage commitment, the some underlying mechanisms remained mysterious, especially how EBF1 primarily binds to closed chromatin. With regard to this, the results of section \ref{encode_peaks_chippartitioning}, suggesting that EBF1 binding sites may be covered by nucleosome arrays, rose my attention. In order to collect evidence that may shed light on this, I conducted a deeper exploration of the EBF1 binding sites.
 
-First, the distribution of nucleosome dyads - from two independent experiments - around EBF1 binding sites revealed a landscape that is compatible with a nucleosome positioned ~70bp apart from the binding sites (Figures \ref{encode_peaks_ebf1}A). This configuration would position the EBF1 binding site at the edge of the nucleosome. The 10bp periodicity visible suggested that other positioning of the EBF1 binding site exist but always at integer numbers of helix turn, such that the EBF1 binding site would always be positioned the same compared to the nucleosome surface. Surprisingly, the distribution of EBF1 motif remained the same, whether the nucleosome was containing an EBF1 bound site or not (Figure \ref{suppl_encode_peaks_ebf1_nucl}).
+First, the distribution of nucleosome dyads - from two independent experiments - around EBF1 binding sites revealed a landscape that is compatible with a nucleosome positioned ~70bp apart from the binding sites (Figures \ref{encode_peaks_ebf1}A). This configuration would position the EBF1 binding sites at the edge of the nucleosome. The 10bp periodicity visible suggested that other positioning of the EBF1 binding site exist but always at integer numbers of helix turn, such that the EBF1 binding site would always be positioned the same compared to the nucleosome surface. Surprisingly, the distribution of EBF1 motif remained the same, whether the nucleosome was containing an EBF1 bound site or not (Figure \ref{suppl_encode_peaks_ebf1_nucl}).
 
 Second, to support the fact that these EBF1 binding sites are indeed functional sites, I compared some of their chromatin features with the entire nucleosome pool. As expected, the presence of EBF1 binding sites was correlated with an increased accessibility (Figure \ref{suppl_encode_peaks_ebf1_chrom}A), even though the opening was spread rather than narrow. Furthermore, this increased opening was concomitant with an enriched H3K4me2 deposition (Figure \ref{suppl_encode_peaks_ebf1_chrom}B), in line with the literature. Last, it was also possible to highlight a higher sequence conservation at the nucleosome edges when they had an EBF1 binding site (Figure \ref{suppl_encode_peaks_ebf1_chrom}C), suggesting a functional difference between both nucleosome pools.
 
 % Finally, Trifonov's motif appeared along the nucleosome, EBF1 motif was  rather present at the nucleosome edges. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif ({A/C}CCC{A/C} and {A/G}GGG{A/G}) at the cost of 2 or 0 missmatches.
 
 % A further inspection of the dinucleotide base composition in the nucleosome bearing an EBF1 binding site revealed a periodic pattern that is compatible with a rotationally positioned nucleosome (Figure \ref{encode_peaks_ebf1}B), as expected from literature in \citep{ioshikhes_variety_2011,gaffney_controls_2012}.
 
 % Finally, the occurrence of the nucleosome positioning motif - YRRRRRYYYYYR where Y is C/T and R is A/G - identified by Trifonov \citep{trifonov_cracking_2011} in these nucleosomes is antiphased with the occurrence of the EBF1 motif. If Trifonov's motif appeared along the nucleosome, EBF1 motif was  rather present at the nucleosome edges. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif ({A/C}CCC{A/C} and {A/G}GGG{A/G}) at the cost of 2 or 0 missmatches.
 
 % These results suggest that EBF1 can bind nucleosomal DNA. In most cases, it seems that the EBF1 binding site is located at its edge. Incidentally, the high similarity between Trifonov and EBF1 motifs suggest that EBF1 binding sequence may have a nucleosome positioning property. Interestingly, EBF1 motif, as identified by JASPAR \ref{suppl_encode_peaks_ebf1_logo}, is 14bp wide. Consequently, it is conceivable that, wherever this motif is located along the nucleosome, at least part of remains facing outward and is thus "readable".
 
 % Based on this observation, I hypothesize that EBF1 may be a pioneering factor or that it influence nucleosomes positioning through its binding. In the first case, EBF1 would be able to target yet inaccessible loci upon the right cellular conditions. In the second case, EBF1 would rather serve to both open and close targeted sites by leading - directly or indirectly - to the positing of a nucleosome right beside of it binding site. Both scenarios make sense. Indeed, EBF1 is known to be crucial for B-cells commitment. In such developmental processes, specific enhancers are made accessible and active at different, in a coordinated manner, during the developmental process. (AND WHAT ABOUT CLOSING???)
 
 Third, a further inspection of the sequence composition of the nucleosomes bearing an EBF1 binding site revealed i) a periodic occurrence of antiphased WW (W=A/T) and SS (S=C/G) dinucleotides and ii) a periodic occurrence of the YRRRRRYYYYYR (R=A/G, Y=C/T) nucleosome positioning motif described by Trifonov \citep{trifonov_cracking_2011}. Together, these observations suggest that EBF1 binding sites are located on the edge of a rotationally positioned nucleosome \citep{ioshikhes_variety_2011,trifonov_cracking_2011,gaffney_controls_2012}. Interestingly, Trifonov's motif appeared in counter phased with EBF1 motif. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif (\{A/C\}CCC\{A/C\} or \{A/G\}GGG\{A/G\}) at the cost of 2 or 0 missmatches.
 
-These results suggest that EBF1 can indeed bind nucleosomal DNA. The motif bound were predominantly located at the edges of the nucleosomes. Yet, this was also the fact for nucleosome that do are not bind by EBF1. This suggests that nucleosomes are already in this position before EBF1 binding, which may be the case given the presence of favorable nucleosome positioning sequences.
+These results suggest that EBF1 can indeed bind nucleosomal DNA. The motifs bound were predominantly located at the edges of the nucleosomes. Yet, this was also the fact for nucleosome that are not bound by EBF1. This suggests that nucleosomes are already in this position before EBF1 binding, which may be the case given the presence of favorable nucleosome positioning sequences.
 
-The reason why the EBF1 motif is already on the edges of nucleosome, even without EBF1 binding, remains unknown. One explanation could be that such sites have a double function. The first function would be to recruit EBF1 to open up the region. The second, would be that EBF1 binding sequence (together with other positioning sequences) can act as a barrier - a potential well - avoiding the nucleosome to roll over in this direction. Such a system would have the advantage of promoting a suited chromatin structure in developmentally important regions. Constraining nucleosome movement would could serve to hide regulatory elements. At the same time, these regions would remain responsive to differentiation signals through the exposition of EBF1 sites on the periphery of nucleosomes.
+The reason why the EBF1 motif is already on the edges of nucleosome, even without EBF1 binding, remains unknown. One explanation could be that such sites have a double function. The first function would be to recruit EBF1 to open up the region. The second, would be that EBF1 binding sequence (together with other positioning sequences) can act as a barrier - a potential well - avoiding the nucleosome to roll over in this direction. Such a system would have the advantage of promoting a suited chromatin structure in developmentally important regions. Constraining nucleosome movement could serve to hide regulatory elements. At the same time, these regions would remain responsive to differentiation signals through the exposition of EBF1 sites on the periphery of nucleosomes.
 
  
 \section{Discussion}
 
-Overall, the results presented in this section overall complement and support the observations made by other research groups worldwide.
+Overall, the results presented in this section complement and support the observations made by other research groups worldwide.
 
 % nucleosome arrays and NDR
 The systematic study of the nucleosome landscape in the viccinity of TFs binding sites highlighted that nucleosome arrays are always present on the flanking regions. However, all the TFs, with the exception of CTCF, do not act as a barrier and thus are not major determinant of the chromatin architecture. Instead, an alternative mechanism, probably involving chromatin remodelers, is likely to be responsible. Furthermore, all TFs were found to bind in NDRs with the noticeable exception of EBF1.
 
 % EBF1
-Surprisingly, a large fraction of EBF1 binding sites was found to be occupied by what seemed to be a rotationally positioned nucleosome which edges are bound by EBF1. Furthermore, it appeared that EBF1 binding motif resembles a nucleosome positioning sequence and could be involved in the positioning of the nucleosome.  However, at least two alternative scenarios could explain the presence of an EBF1 binding site at the entry of a nucleosome. First, EBF1 genuinely binds to such "pre-positioned" nucleosomes, in which case I am observing EBF1 true binding mechanism. Alternatively, EBF1 binding - to either nucleosomal or naked DNA - results in the positioning of a nucleosome right beside. To my opinion, the previous results suggesting a pioneer function for EBF1 \citep{boller_pioneering_2016} makes the second hypothesis more likely. EBF1 would directly engage a nucleosome and somehow trigger its displacement such that EBF1 binding site will eventually reside at the nucleosome edge. Testing this hypothesis could be performed by assaying in vitro binding of EBF1 to assembled nucleosome arrays. 
+Surprisingly, a large fraction of EBF1 binding sites was found to be occupied by what seemed to be a rotationally positioned nucleosome which edges were bound by EBF1. Furthermore, it appeared that EBF1 binding motif resembles a nucleosome positioning sequence and could be involved in the positioning of the nucleosome.  However, at least two alternative scenarios could explain the presence of an EBF1 binding site at the entry of a nucleosome. First, EBF1 genuinely binds to such "pre-positioned" nucleosomes, in which case I am observing EBF1 true binding mechanism. Alternatively, EBF1 binding - to either nucleosomal or naked DNA - results in the positioning of a nucleosome right beside. To my opinion, the previous results suggesting a pioneer function for EBF1 \citep{boller_pioneering_2016} makes the second hypothesis more likely. EBF1 would directly engage a nucleosome and somehow trigger its displacement such that EBF1 binding site will eventually reside at the nucleosome edge. Testing this hypothesis could be performed by assaying in vitro binding of EBF1 to assembled nucleosome arrays. 
 
 % CTCF
-The study of CTCF binding sites revealed that they can be grouped in i) promoter distal and ii) promoter proximal binding sites. In each of the subset, CTCF was observed to bind with a different group of interactors, suggesting different functions. At promoter distal binding sites CTCF is associated the cohesin complex while at promoter proximal regions, CTCF seems to be associated with ZNF143 and YY1.
+The study of CTCF binding sites revealed that they can be grouped in i) promoter distal and ii) promoter proximal binding sites. In each of the subsets, CTCF was observed to bind with a different group of interactors, suggesting different functions. At promoter distal binding sites CTCF is associated the cohesin complex while at promoter proximal regions, CTCF seems to be associated with ZNF143 and YY1.
 
 % interaction
 Finally the study of the motif co-localization, even if simple, seemed quite powerful as it allowed to identify 35 interactions with CTCF or junD. Out of these, 25 have already been proposed but without experimental support, 5 have been proposed and experimentally validated and 5 were new. These 5 new interactions are proposed to be indirect co-binding event and thus imply a physical interaction that can be tested.
 
 \section{Methods}
 
 \subsection{Data and data processing}
 \label{encode_peaks_methods_data}
 
 All the GM12878 ENCODE data used were mapped against hg19 genome and can be found on the MGA repository \citep{dreos_mga_2018}.
 
 Peaks called by the ENCODE Consortium using their uniform processing pipeline \cite{gerstein_architecture_2012} were used. These peaks can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/Uniform-TFBS/Uniform-TFBS.html}. Assuming that a TF binds to DNA through motif recognition, the peak center should be localized on the motif center. Thus the center of each peak was moved to the closest motif instance within 60bp. To do so, each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \cite{mathelier_jaspar_2014}, HOCOMOCO v10 \cite{kulakovskiy_hocomoco:_2016} or Jolma \cite{jolma_dna-binding_2013} collection. Using the corresponding log-odd PWM, peak sequences were scanned to find motif instance with a score corresponding to a pvalue higher or equal to 1e-4. If such a motif instance was found, the peak position was shifted to the center of the motif instance and mapped to the corresponding strand. Otherwise, the peak position remained unchanged without strand information.
 
 In GM12878 cells, nucleosome occupancy was assessed using MNase-seq data released by the ENCODE Consortium (GSE35586). These data can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/GSE35586/GSE35586.html}. To increase sequencing depth, all replicates available for this cell line were pooled together, resulting in ~789 mio reads, and used as a single dataset. The resulting dataset is available and has the description "GM12878|Nucleosome|all (SLOW!)". Because each read was represented as a single point coordinate corresponding to their 5' edges, these coordinates were centered by 70bp in order to indicate the nucleosome dyads. Finally, another dataset was used for one analysis only. These data were released by Gaffney and colleagues \cite{gaffney_controls_2012} and can be found at \url{https://ccg.epfl.ch/mga/hg19/gaffney12/gaffney12.html} and were not centered as the coordinates already represent the center of paired-end sequenced fragments. The dataset is labeled "All Paired-end samples - 147bp fragments".
 
 Chromatin accessibility was assessed using DNaseI-seq data released by the ENCODE Consortium \cite{boyle_high-resolution_2008} (GSE32970). To increase sequencing depth, all replicates available for GM12878 cells were pooled together, resulting in ~144 mio reads, and used as a single dataset. The  individual replicates can found at \url{https://ccg.epfl.ch/mga/hg19/encode/Duke-DNaseI-HS/Duke-DNaseI-HS.html}. The reads were represented as a single point coordinate corresponding the their 5' edges but were not centered as this correspond to the exact DNaseI nick location.
 
 The EPDnew release 003 was used as TSS annotation \cite{dreos_eukaryotic_2017} and genome sequence conservation was assessed using Phastcons \cite{siepel_evolutionarily_2005}. Both datasets can be found at \url{https://ccg.epfl.ch/mga/hg19/epd/epd.html} and \url{https://ccg.epfl.ch/mga/hg19/phastcons/phastcons.html} respectively.
 
 \subsection{Classification of MNase patterns}
 \label{encode_peaks_em_mnase}
 
 For each TF peaklist MNase, DNase, sequence conservation and TSS density around TF binding site were assessed independently by counting the number of read mapped from -999bp to +1000bp around each peak, using 10bp bins. For each TF, 4 matrices having one row per binding site (peak) and 199 columns were created using ChIP-extract program \citep{ambrosini_chip-seq_2016}.
 
 Probabilistic pattern classification was achieved using the ChIPPartitioning (see section \ref{encode_peaks_chippartitioning}). The algorithm was implemented as described in the supplemental materials of \cite{nair_probabilistic_2014}. 
 
 Two different procedures were used to classified MNase patterns. Both were run for 10 iterations allowing flip and a value of shift of 15 bins.
 
 The first procedure aimed to discover 4 different pattern classes, allowing flip and a shift of 15 bins. The procedure was initialized with 4 classes. The class patterns were initialized by assigning each peak a random probability to belong to each of the 4 classes. The patterns were then computed as the weighted average of the signal given the peak class probabilities as weights. Then the prior class probabilities were initialized as $p_{k,s,f} = 1/K*S*2$ where $k$ is the class index, $s$ is the shift value in bins (here 15), $f$ is an indicative variable for the flip state (1 for "normal", 2 for "reverse"), $K$ is the number of classes (here 4) and $S$ is the maximum allowed shift in bins. The classification was run for 10 iterations. At the end, it returned a matrix of dimensions $NxKxSx2$ containing the probabilities for each of the $N$ region to belong to each of the $K$ class, for each possible shift state $S$ and for both flip states ("normal" or "reversed"). 
 
   The second procedure aimed to discriminate between 2 classes : i) the binding sites describing the "average" binding sites as opposed to ii) those differing from this. To do so, class patterns were initialized to i) the aggregation over all peaks (the average pattern) and ii) a flat pattern being the mean number of counts of the input matrix. Flip and 15 bins of shift were allowed. The prior class probabilities were initialized as $p_{k,s,f} = \mathcal{N}(s,floor(S/2)+1,1)$ where the second and third parameters are the mean and the standard deviation, giving a higher prior probability to states with shift equal to 0bp.
 
 \subsection{Quantifying nucleosome array intensity from classification results}
 Nucleosome array intensity was quantified using a method developed by Zhang and colleagues \citep{zhang_canonical_2014}. Briefly, nucleosome signal is represented in 2 dimensions as a set of signal intensities for a given set of positions. Data are structured as vector $Y$ containing the nucleosome occupancy signal (for instance an EM classification class profile) for $n$ bins (for EM class profiles, 199 bins of 10bp). First, the 1$^{st}$ order derivative $D_{1}$ of $Y$ is computed. Then the 1$^{st}$ order derivative $D_{2}$ of the absolute value of $D_{1}$ is computed. Local maxima in $D_{2}$ are searched using a windows of 15 bins (corresponding to 150bp, a nucleosome width). Maxima can be interpreted as strong drop or enrichment of signal, corresponding to a pattern expected from a well positioned nucleosome array. Finally, all $D_{2}$ maxima are joint by a line and the nucleosome array intensity at each given position is the height of the line at this position. The nucleosome array density for the first and last position of $Y$ were set to 0. The average nucleosome array intensity of $Y$ was used as the nucleosome array value of the input data.
 
 The classification of a matrix of counts having $N$ rows (regions), with $K$ classes, allowing a maximum of $S$ shift states and two flip states ("normal" and "reverse") outputs a probability matrix $P$ of dimension [$N$, $K$, $S$, 2] containing the probability for each region to belong to each class, given a shift state and a flip state. This matrix can be used to compute a vector $D_{k}$ of length $S$ containing the probability density of the shift states for a class $k$ using :
 
 \begin{equation}
 \begin{aligned}
 	D_{k,s} & = \frac {\sum_{i=1}^{N} (P_{i,k,s,1} + P_{i,k,s',2})} {\sum_{i=1}^{N} \sum_{s=1}^{S} (P_{i,k,s,1} + P_{i,k,s',2})} \\
 	\text{with } \\
 	 s' & = S - s + 1
 \end{aligned}
 \label{encode_peaks_equation_shift_density1}
 \end{equation}
 
 
 \citep{ambrosini_chip-seq_2016}
 where $s'$ represents the index of the reverse orientation and with the constrain that all the elements of $P$ sum to 1. Given the shift probability density vector $D_{k}$ of one class, computing its standard deviation was done using :
 
 \begin{equation}
 \begin{aligned}
 	\sigma_{k} & = \sqrt { \sum_{i=1}^{S} (X_{i}^{2} \cdot D_{k,i}) - \mu_{k}^{2} }\\
 	\text{with } \\
 	\mu_{k} & = \sum_{i=1}^{S} (X_{i} \cdot D_{k,i})
 \end{aligned}
 \label{encode_peaks_equation_shift_density2}
 \end{equation}
 
 where $X$ is a vector containing the position changes in bp for every shift state, i.g. for a maximum number of shift states of 15 ($S=15$) with bins of 10bp, X would contain [-70, -60, ..., 0,  ..., +60, +70].
 
 \subsection{Peak colocalization}
 
 To measure the extent of colocalization between CTCF, YY1, ZNF143, SMC3 and RAD21, the occurrence of YY1, ZNF143, SMC3 and RAD21 peaks around CTCF peaks was computed using ChIP-extract \citep{ambrosini_chip-seq_2016}. The CTCF peak list used as reference was "wgEncodeAwgTfbsSydhGm12878Ctcfsc15914sc20UniPk" because it was the CTCF peak list containing i) the most CTCF peaks and ii) the highest proportion of peaks with a motif. Chip-extract was run separately for YY1, ZNF143, SMC3 and RAD21 using the following parameters : from -99, to 100, window size 1. Then, the propotion of CTCF peak having at least one other peak within +/-10 bp, 50bp or 100bp was computed.
 
 \subsection{NDR detection}
 
 Let us consider a matrix of MNase-seq counts $R$ of dimensions $NxL$ containing N vectors of read counts $r_{1}, r_{2}, ..., r_{n}$ of length $L$. Because MNase-seq reads are a direct indication of the nucleosome occupancy, detecting NDRs is about finding low signal regions, flanked by two high signal regions.
 
 The signal in each vector $X_i$ (region) is assumed to have been sampled from a 2 class mixture of high (nucleosome) and low (nucleosome-free) signal, using a Poisson distribution. Both classes are expected to occur with a given probability  $p^{nucl}_{i}$ and $p^{free}_{i}$. The rows are considered individually to lessen technical biases such as region specific sequencing depth.
 
 The class probabilities and their mean parameters are estimated using an EM algorithm. First, during the E-step, for each position inside a region, the posterior probability of the nucleosome given the data is computed using :
 
 \begin{equation}
 \begin{aligned}
 	P(nucl | r_{i,l}) = \frac{p_{i}^{nucl} \times Poisson(r_{i,l}, \lambda=m_{i}^{nucl})}
 	                         {p_{i}^{nucl} \times Poisson(r_{i,l}, \lambda=m_{i}^{nucl}) +
 	                          p_{i}^{free} \times Poisson(r_{i,l}, \lambda=m_{i}^{free})}
 \end{aligned}
 \end{equation}
 
 where $r_{i,l}$ is the number of reads at position $l$ in the i-th row of $R$, $m_{i}^{nucl}$ and $m_{i}^{free}$ are the mean parameters of the nucleosome and nucleosome-free classes respectively. Obviously, the nucleosome-free class posterior probability is
 
 \begin{equation}
 \begin{aligned}
 	P(free | r_{i,l}) = 1 - P(nucl | r_{i,l})
 \end{aligned}
 \end{equation}
 
 Then, during the M-step, the class mean parameters are updated using
 
 \begin{equation}
 \begin{aligned}
 	m_{i}^{nucl} = & \sum_{l=1}^{L} r_{i,l} \times P(nucl | r_{i,l}) \\
 	m_{i}^{free} = & \sum_{l=1}^{L} r_{i,l} \times P(free | r_{i,l})
 \end{aligned}
 \end{equation}
 
 and the class probabilities :
 \begin{equation}
 \begin{aligned}
 	p_{i}^{nucl} = & \frac{1} {L} \times \sum_{l=1}^{L} P(nucl | r_{i,l}) \\
 	p_{i}^{free} = & 1 - p_{i}^{nucl}
 \end{aligned}
 \end{equation}
 
 The EM optimization of the parameter estimates was repeated for 10 iterations. At the end of the parameter estimation process, each of the $L$ positions in a region $R_{i}$ were assigned two posterior probabilities $P(nucl | r_{i,l})$ and $P(free | r_{i,l})$ to belong to each class. In all cases, the nucleosome class was the class having the highest mean parameter and the nucleosome free class the class with the smallest ($m_{i}^{nucl} > m_{i}^{free}$).
 
 The binding sites - located in the center of the regions, at position $s = L/2$ - were assumed to be within the NDR. From that point, the NDR was extended using the following procedure :
 
 \SetKwProg{Fn}{}{\{}{}\SetKwFunction{Function}{float NDRextend}%
 \begin{algorithm}[H]
 	\label{encode_peaks_algo_ndr_extend}
 	\Fn{\Function{}}
 	{	\KwData{The posterior probabilities obtained for each position of $r_{i}$.}
 		\KwResult{the left and right coordinates of the NDR}
 		
 		\tcp{NDR only covers the central location}		
 		$left  = s$ \;
 		$right = s$ \;
 		
 		\While{$left \ne 2$ and $right \ne L-1$}
 		{	$p.free.l = P(free|r_{i,left})$ \;
 			$p.free.r = P(free|r_{i,right})$ \;
 			$p.nucl.l = P(nucl|r_{i,left})$ \;
 			$p.nucl.r = P(nucl|r_{i,right})$ \;
 
 			\tcp{bidirectional extension}
 			\If{$prob.free.l > p.nucl.l$ and $p.prob.free.r > p.nucl.r$}
 			{	$left \minuseq 1$ \; 
 				$right \pluseq 1$ \;
 			}
 			
 			\tcp{extension to left}
 			\ElseIf{$prob.free.l > p.nucl.l$}
 			{	$left \minuseq 1$ \;  }
 			
 			\tcp{extension to right}			
 			\ElseIf{$p.prob.free.r > p.nucl.r$}
 			{	$right \pluseq 1$ \; }
 			
 			\tcp{no more extension possible}
 			\Else
 			{	break \; }
 		}
 		
 		\Return{$left$, $right$}
 	}
 	\caption{Searches the coordinates of the NDR using the posterior nucleosome and nucleosome free class probabilities, for a region $R_i$, from its central position.}
 \end{algorithm}
 
 The nucleosome occupancy around CTCF binding sites was measured using ChIP-extract with "wgEncodeAwgTfbsSydhGm12878Ctcfsc15914sc20UniPk" peak list as reference - because it was the CTCF peak list with the most peaks and with the highest proportion of peaks with a CTCF motif -, the ENCODE MNase-seq data described in section \ref{encode_peaks_methods_data} as targets and the following parameters : from -999bp, to 1000bp and window size 10bp.
 
 This matrix was subjected to a ChIPPartitioning partitioning, as described in section \ref{encode_peaks_em_mnase}, to find 4 nucleosome architectures, using shifting and flipping. The resulting posterior probabilities were used to re-orient the data. If the major shift state - that is the shift state with the highest overall probability - for a given region was the "reverse" state, then the row was reversed. The re-oriented matrix was then subjected to the NDR detection. The re-orientation was done for aesthetic purposes only. Because the NDR detection was performed starting from the center position in each region - and given that reverting a vector did not change its central position - this operation had no influence on the NDR detection.
 
 
 
 \subsection{CTCF and JunD interactors}
 
 % Enumerating motif instances genome-wide
 To enumerate instance of CTCF and JunD motif, the hg19 genome assembly was scanned using CTCF (MA0139.1 from JASPAR Core Vertebrate 2014 \citep{mathelier_jaspar_2014}) and JunD (JUND\_HUMAN.H10MO.A from HOCOMOCOv10 \citep{kulakovskiy_hocomoco:_2016}) matrices to produce lists of potential binding sites. A limit score threshold was set as the score corresponding to a pvalue of 1e-5 for each matrix, respectively. This was done using matrix\_scan program from PWMScan \citep{ambrosini_pwmscan:_2018}. Eventually, any motif instance falling inside a region classified as being a repeated element and blacklisted by the ENCODE Consortium was filtered out using count\_filter program from the ChIP-seq tools \citep{ambrosini_chip-seq_2016-1}.
 
 % Measuring motif instance occurence near peaks
 Then, for each TF peak list independently, the number of i) the TF and ii) CTCF/JunD instances +/- 1kb of each peak was measured, in bins of 1bp, using ChIP-extract program from the ChIP-seq tools \citep{ambrosini_chip-seq_2016-1}. The association were measured as follows : using the ChIP-extract results for the given peak list versus i) the TF and ii) CTCF/Jund motif instances, the number of peaks having i) at least one TF and one CTCF/Jund motif instances, ii) only TF motif instances, iii) only CTCF/JunD motif instances or iv) no motif instance. These numbers were used to build a contingency table and a two-sided Fisher exact test for association was performed. The motif relationship was considered significant if the test OR was bigger than 1 and the 95\% CI of the OR did not contain 1 or as a significant motif exclusion if the OR was smaller than 1 and the 95\% CI of the OR did not contain 1.
 
 % Motif density around peaks
 The motif occurence densities were computed from the ChiP-extract result matrices. Out of each matrix, a vector containing the number of motif instances at each possible absolute distance was computed. This was done as follows : first each each non-null cell neighbours were incremented (+/- 5 columns on each side) to turn motif instance hits into non point-like representation. A given cell value could be incremented several times. Second for each row, the column corresponding to the same absolute distances from the peak were summed together (i.g. +1bp with -1bp, +2bp with -2bp, +999bp with -999bp). The first column of the resulting matrix should contain number of motif instances present at the peak center (distance of 0bp), the second column at an absolute distance of 1bp and so one. Eventually, the row were summed up and the resulting vector was considered as the motif density vector for the given peak list. The vectors were used to create a matrix for CTCF motif and Jund motif (a vector corresponds to a row), separately, and the matrix was displayed as a heatmap. The row values were standardized and the rows hierarchically clustered using the euclidean distance.
 
 
 \subsection{EBF1 and nucleosome}
 
 The correlation between EBF1 binding sites and nucleosome dyads was made using ChIP-cor \citep{ambrosini_chip-seq_2016-1}, from the web (\url{https://ccg.epfl.ch/chipseq/chip_cor.php}). The references were the corrected EBF1 peaks (wgEncodeAwgTfbsHaibGm12878Ebf1sc137065Pcr1xUniPk dataset, for more details see section \ref{encode_peaks_methods_data}) and the targets either i) the MNase-seq data released by Gaffney et al. \citep{gaffney_controls_2012} (hg19 / DNase FAIRE etc / Gaffney 2012 ... / All Paired-end samples - 147bp fragments) or ii) the ENCODE MNase-seq data (hg19 / ENCODE DNase FAIRE etc / GSE35586 ... / GM12878 Nucleosome all (SLOW!)). In both cases, "any" strand was selected. Because Gaffney data are paired-ended and represent the fragment midpoint (the dyad), no centering was done. The ENCODE data are single-ended and a centering of 70bp (half a nucleosome) was applied to approximate the fragment midpoint. The count cut-off was set to 1 and the range to -399 to +400bp.
 
 To isolate nucleosomes with an EBF1 binding site, the opposite ChIP-cor analysis was run : Gaffney data as references versus EBF1 binding sites as targets with count cut-off set to 1 and the range to -399 to +400bp. In the results page the "Feature Selection Tool" was used to select dyads with at least 1 EBF1 binding site (threshold parameter) located "From" -99bp "To" 100bp. The count cut-off was set to 9999 and both "Switch to depleted feature" and "Reference feature oriented" set to "Off".
 
 These nucleosome dyads were uploaded to OProf (\url{https://ccg.epfl.ch/ssa/oprof.php}) on the SSA server \citep{ambrosini_signal_2003}. Four individual analyses were run to measure the "WW", "SS", "YRRRRRYYYYYR" and EBF1 motif occurrences. In all cases, the 5' and 3' borders were set to -399bp and 400bp, the window shift to 1bp and the search mode to "bidirectional". For "SS" and "WW", the motif to search was entered as a "Consensus sequence", the window size was set to 2bp, the reference position to 1 and the number of allowed mismatches to 0. For "YRRRRRYYYYYR", the motif was also entered as a "Consensus sequence", the window size was set to 12bp, the reference position to 6 and the number of allowed mismatches to 4. For the EBF1 motif, the JASPAR CORE Vertebrate 2018 "EBF1 MA0154.3 (length=14)" was used with a window size of 14bp, a reference position of 7 and a p-value threshold of 1e-4.
 
 To investigate the chromatin architecture around nucleosome dyads, ChIP-cor was used. Two references were used : i) the nucleosomes with an EBF1 binding site (see above) and ii) the entire Gaffney dataset (hg19 / DNase FAIRE etc / Gaffney 2012 ... / All Paired-end samples - 147bp fragments). For each reference, three analyses were run against different target features : i) DNase-seq data to monitor chromatin accessibility (hg19 / ENCODE DNase FAIRE etc / Boyle 2008 ... DNaseI HS - GM12878 - Rep 1) with "any" strand and no centering, ii) H3K4me2 ChIP-seq data (hg19 / ENCODE ChIP-seq / GSE29611 ... / GM12878 H3k4me2) with "any" strand and a centering of 70bp (half the nucleosome) and iii) positional sequence conservation scores (hg19 / Sequence derived / Vertebrate Conservation (phastCons46way) ... / PHASTCONS VERT46) with "any" strand an no centering. For DNase-seq and sequence conservation, the range was set to -399bp to 400bp with a window with of 1bp. For H3K3me2 data, the range was set to -3999bp to 4000bp with a window width of 10bp. For the DNase-seq and the H3K4me2 data, the count cut-off were set to 1, for the sequence conservation to 9999.
diff --git a/main/ch_spark.aux b/main/ch_spark.aux
index 4ff6fd4..7f6fcda 100644
--- a/main/ch_spark.aux
+++ b/main/ch_spark.aux
@@ -1,98 +1,102 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
 \citation{groux_spar-k:_2019}
 \citation{hon_chromasig:_2008}
 \citation{lai_archalign:_2010}
-\citation{nielsen_catchprofiles}
+\citation{nielsen_catchprofiles:_2012}
 \citation{kundaje_ubiquitous_2012}
 \citation{nair_probabilistic_2014}
 \@writefile{toc}{\contentsline {chapter}{\numberline {4}SPar-K}{59}{chapter.4}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
 \@writefile{loa}{\addvspace {10\p@ }}
 \newlabel{spark}{{4}{59}{SPar-K}{chapter.4}{}}
 \@writefile{chapter}{\contentsline {toc}{SPar-K}{59}{chapter.4}}
 \@writefile{toc}{\contentsline {section}{\numberline {4.1}Algorithm}{59}{section.4.1}}
 \citation{arthur_k-means++:_2007}
 \citation{groux_spar-k:_2019}
 \citation{groux_spar-k:_2019}
 \citation{groux_spar-k:_2019}
 \citation{groux_spar-k:_2019}
 \citation{groux_spar-k:_2019}
 \citation{groux_spar-k:_2019}
+\citation{groux_spar-k:_2019}
+\citation{groux_spar-k:_2019}
 \@writefile{toc}{\contentsline {section}{\numberline {4.2}Implementation}{60}{section.4.2}}
-\citation{leisch_toolbox_2006}
-\citation{nair_probabilistic_2014}
-\citation{nair_probabilistic_2014}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Synthethic datasets : \textbf  {A} The class signal densities. \textbf  {B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf  {C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf  {D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf  {E} one of the corresponding SPar-K partition, with shifting and flipping.\relax }}{61}{figure.caption.23}}
-\newlabel{spark_simulated_data}{{4.1}{61}{Synthethic datasets : \textbf {A} The class signal densities. \textbf {B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf {C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf {D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf {E} one of the corresponding SPar-K partition, with shifting and flipping.\relax }{figure.caption.23}{}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.3}Benchmarking}{61}{section.4.3}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.3.1}K-means}{61}{subsection.4.3.1}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Synthethic datasets : \textbf  {A} The class signal densities. \textbf  {B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf  {C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf  {D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf  {E} one of the corresponding SPar-K partition, with shifting and flipping. Figure and legend taken and adapted from \citep  {groux_spar-k:_2019}.\relax }}{61}{figure.caption.23}}
+\newlabel{spark_simulated_data}{{4.1}{61}{Synthethic datasets : \textbf {A} The class signal densities. \textbf {B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf {C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf {D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf {E} one of the corresponding SPar-K partition, with shifting and flipping. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.23}{}}
 \@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces \textbf  {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep  {groux_spar-k:_2019}.\relax }}{62}{figure.caption.24}}
 \newlabel{spark_ari}{{4.2}{62}{\textbf {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.24}{}}
 \@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textbf  {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf  {A} Seeding done at random, \textbf  {B} seeding done at random and outlier smoothing \textbf  {C} seeding done with the K-means++ method \textbf  {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep  {groux_spar-k:_2019}.\relax }}{63}{figure.caption.25}}
 \newlabel{spark_sse}{{4.3}{63}{\textbf {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf {A} Seeding done at random, \textbf {B} seeding done at random and outlier smoothing \textbf {C} seeding done with the K-means++ method \textbf {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.25}{}}
 \@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces \textbf  {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep  {groux_spar-k:_2019}.\relax }}{63}{figure.caption.26}}
 \newlabel{spark_time}{{4.4}{63}{\textbf {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.26}{}}
-\citation{groux_spar-k:_2019}
+\citation{leisch_toolbox_2006}
+\citation{nair_probabilistic_2014}
+\citation{nair_probabilistic_2014}
+\@writefile{toc}{\contentsline {section}{\numberline {4.3}Benchmarking}{64}{section.4.3}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.3.1}K-means}{64}{subsection.4.3.1}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {4.3.2}ChIPPartitioning}{64}{subsection.4.3.2}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {4.3.3}Data}{64}{subsection.4.3.3}}
-\citation{ambrosini_chip-seq_2016}
+\citation{groux_spar-k:_2019}
+\citation{groux_spar-k:_2019}
+\citation{groux_spar-k:_2019}
 \citation{ambrosini_chip-seq_2016}
 \citation{groux_spar-k:_2019}
+\citation{ambrosini_chip-seq_2016}
 \citation{groux_spar-k:_2019}
 \citation{bailey_meme_2009}
-\citation{kundaje_ubiquitous_2012}
 \@writefile{toc}{\contentsline {subsection}{\numberline {4.3.4}Performances}{65}{subsection.4.3.4}}
 \@writefile{toc}{\contentsline {section}{\numberline {4.4}Partition of DNase and MNase data}{65}{section.4.4}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.5}Conclusions}{65}{section.4.5}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf  {A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf  {B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf  {C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf  {D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf  {E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf  {F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep  {ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf  {G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf  {H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster.\relax }}{66}{figure.caption.27}}
-\newlabel{spark_ctcf}{{4.5}{66}{Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf {A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf {B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf {C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf {D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf {E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf {F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep {ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf {G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf {H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster.\relax }{figure.caption.27}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf  {A.} Input data based on peak summits provided by ENCODE. \textbf  {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf  {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf  {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf  {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf  {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf  {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf  {B}. Figure and legend taken and adapted from \citep  {groux_spar-k:_2019}.\relax }}{67}{figure.caption.28}}
-\newlabel{spark_dnase}{{4.6}{67}{Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.28}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf  {A.} Input data based on peak summits provided by ENCODE. \textbf  {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf  {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf  {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf  {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf  {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf  {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf  {B}. Figure and legend taken and adapted from \citep  {groux_spar-k:_2019}.\relax }}{66}{figure.caption.27}}
+\newlabel{spark_dnase}{{4.5}{66}{Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.27}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf  {A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf  {B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf  {C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf  {D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf  {E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf  {F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep  {ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf  {G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf  {H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster. Figure and legend taken and adapted from \citep  {groux_spar-k:_2019}.\relax }}{67}{figure.caption.28}}
+\newlabel{spark_ctcf}{{4.6}{67}{Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf {A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf {B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf {C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf {D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf {E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf {F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep {ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf {G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf {H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.28}{}}
+\citation{kundaje_ubiquitous_2012}
+\@writefile{toc}{\contentsline {section}{\numberline {4.5}Conclusions}{68}{section.4.5}}
 \@setckpt{main/ch_spark}{
-\setcounter{page}{68}
+\setcounter{page}{69}
 \setcounter{equation}{1}
 \setcounter{enumi}{8}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
 \setcounter{enumiv}{0}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{0}
 \setcounter{chapter}{4}
 \setcounter{section}{5}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{paragraph}{0}
 \setcounter{subparagraph}{0}
 \setcounter{figure}{6}
 \setcounter{table}{0}
 \setcounter{NAT@ctr}{0}
 \setcounter{FBcaption@count}{0}
 \setcounter{ContinuedFloat}{0}
 \setcounter{KVtest}{0}
 \setcounter{subfigure}{0}
 \setcounter{subfigure@save}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
 \setcounter{subtable@save}{0}
 \setcounter{lotdepth}{1}
 \setcounter{lips@count}{0}
 \setcounter{lstnumber}{1}
 \setcounter{Item}{8}
 \setcounter{Hfootnote}{0}
 \setcounter{bookmark@seq@number}{0}
 \setcounter{AM@survey}{0}
 \setcounter{ttlp@side}{0}
 \setcounter{myparts}{0}
 \setcounter{parentequation}{0}
 \setcounter{AlgoLine}{28}
 \setcounter{algocfline}{1}
 \setcounter{algocfproc}{1}
 \setcounter{algocf}{1}
 \setcounter{float@type}{8}
 \setcounter{nlinenum}{0}
 \setcounter{lstlisting}{0}
 \setcounter{section@level}{0}
 }
diff --git a/main/ch_spark.tex b/main/ch_spark.tex
index 23fa6b1..eb640ea 100644
--- a/main/ch_spark.tex
+++ b/main/ch_spark.tex
@@ -1,119 +1,121 @@
 \cleardoublepage
 \chapter{SPar-K}
 \label{spark}
 \markboth{SPar-K}{SPar-K}
 \addcontentsline{chapter}{toc}{SPar-K}
 
-This chapter describes SPar-K (Signal Partitioning with K-means), a modification of the K-means algorithm to cluster genomic regions based on their chromatin organization, defined by by their sequencing profiles. 
+This chapter describes SPar-K (Signal Partitioning with K-means), a modification of the K-means algorithm to cluster genomic regions based on their chromatin organization, defined by their sequencing profiles.
 
 I developed, implemented and benchmark this algorithm and produced all the figures that are shown in this chapter. The content of this section is taken an adapted from the original article \citep{groux_spar-k:_2019}.
 
 % Due to the wealth of sequencing data, it is common to analyze positional correlations between chromatin features, e.g. the position of nucleosomes (revealed by MNase-seq) relative to transcription factor binding regions (mapped by ChIP-seq) in order to shed light on their functional relationship. However, as noted in \citep{kundaje_ubiquitous_2012}, chromatin patterns tend to be heterogeneous and often asymmetric. Furthermore, limited mapping precision leading to moderate misalignment between functionally equivalent regions can also obscure a chromatin pattern.
 
-Due to the wealth of sequencing data, it is common to analyze positional correlations between chromatin features, e.g. the position of nucleosomes (revealed by MNase-seq) relative to transcription factor binding regions (mapped by ChIP-seq) in order to shed light on their functional relationship. Several methods and software have been developed for discovering chromatin pattern by clustering and/or realignment of signal profiles for genomic regions (see section \ref{intro_pattern_discovery}), including ChromaSig \citep{hon_chromasig:_2008}, ArchAlign \citep{lai_archalign:_2010}, CATCHProfiles \citep{nielsen_catchprofiles}, CAGT \citep{kundaje_ubiquitous_2012} and ChIPPartitioning\citep{nair_probabilistic_2014}. However, all of these programs have some limitations. Some do not realign, others are restricted to count data or lack an runtime efficient implementation, such as ChIPParititioning. To fill this gap, I developed SPar-K (Signal Partitioning with K-means).
+Due to the wealth of sequencing data, it is common to analyze positional correlations between chromatin features, e.g. the position of nucleosomes (revealed by MNase-seq) relative to transcription factor binding regions (mapped by ChIP-seq) in order to shed light on their functional relationship. Several methods and software have been developed for discovering chromatin patterns by clustering and/or realigning read density profiles over genomic regions (see section \ref{intro_pattern_discovery}), including ChromaSig \citep{hon_chromasig:_2008}, ArchAlign \citep{lai_archalign:_2010}, CATCHProfiles \citep{nielsen_catchprofiles:_2012}, CAGT \citep{kundaje_ubiquitous_2012} and ChIPPartitioning\citep{nair_probabilistic_2014}. However, these programs have some limitations. Some do not perform a realignment, others are restricted to count data or lack an runtime efficient implementation, such as ChIPParititioning. To fill this gap, I developed SPar-K (Signal Partitioning with K-means).
 
 \section{Algorithm}
 
 % summary
-SPar-K algorithm (Algorithm \ref{algo_spark}) is a modified version of the regular K-means algorithm during which a set of $N$ regions of size $L$ are partitioned into $K$ clusters, using an iterative optimization procedure. Each cluster is composed of an alignment of regions sub-parts of length $W$ assigned to this cluster and the cluster is summarized a vector of length $L \geq W$ that contains the average signal at each position in the alignment.
+SPar-K algorithm (see Algorithm \ref{algo_spark}) is a modified version of the regular K-means algorithm during which a set of $N$ regions of size $L$ are partitioned into $K$ clusters, using an iterative optimization procedure. Each cluster is composed of an alignment of regions sub-parts of length $L'a$ assigned to this cluster and the cluster is summarized a vector of length $L \geq L'$ that contains the average signal at each position in the alignment.
 
 % input
-The input data are stored as a $N$ rows and $L$ columns matrix $R$. The signal resolution may be at single-base or at a larger bin size. The regions are typically defined by relative positions to an anchor point, e.g. a ChIP-seq peak summit. If the signal is noisy, a data smoothing step can be undertaken to average out outlier values (Algorithm \ref{algo_smooth_outliers}) and ease the partitioning procedure.
+The input data are stored as a $N$ rows and $L$ columns matrix $R$. The signal resolution may be at single-base or at a larger bin size. The regions are typically defined by relative positions to an anchor point, e.g. a ChIP-seq peak summit. If the signal is noisy, a data smoothing step can be performed to average out outlier values (see Algorithm \ref{algo_smooth_outliers}) and ease the partitioning procedure.
 
 % objective function
-SPar-K optimizes the alignments by minimizing the sum of squares errors. That is, the sum of the squares distances of each point to the cluster aggregation they are assigned to.
+SPar-K optimizes the alignments by minimizing the sum of squares errors. That is, the sum of the squared distances of each point to the cluster aggregation they are assigned to.
 
 % distances
-The distance between any two regions is computed a modified correlation distance. Let us assume two regions $X$ and $Y$ of length $L$ and a shifting freedom $S$. $X$ and $Y$ will be sub-divided in $S$ slices each. Each slice has a length of $W$=$L-S-1$ and starts at all possible offsets $s=1,2,...,S$. All $S^{2}$ pairwise comparisons between any slices of $X$ and $Y$ are computed using $1-cor(X_{i},Y_{j})$ where $X_{i}$ and $Y_{j}$ are the slices starting at offsets $i,j \in s$. If flipping is allowed, another set of $S^{2}$ comparisons is performed by flipping $Y_{j}$ (that is, the 1st position in $Y_{j}$ becomes the last and vice-versa), resulting in $2 \times S^{2}$ comparisons. Eventually, the distance between $X$ and $Y$ is the minimum of the $S^{2}$ (without flipping) or  $2 \times S^{2}$ (with flipping) values. For each distance, the indices $i$ and $j$ and whether $Y_{j}$ was flipped in the best comparison are remembered as they allow to rebuilt the optimal alignment between $X$ and $Y$. The naive algorithm to do this is $\Theta(S^{2} \times W)$ in time however I could design a faster algorithm which is $\Theta(S \times W)$ by using a dynamic programming approach (see algorithm \ref{algo_distance_fast}).
+The distance between any two regions is computed using a modified correlation distance. Let us assume two regions $X$ and $Y$ of length $L$ and a shifting freedom $S$. $X$ and $Y$ will be sub-divided in $S$ slices each. Each slice has a length of $L'$=$L-S+1$ and starts at all possible offsets $s=1,2,...,S$. All $S^{2}$ pairwise comparisons between any slices of $X$ and $Y$ are computed using $1-cor(X_{i},Y_{j})$ where $X_{i}$ and $Y_{j}$ are the slices starting at offsets $i,j \in s$. If flipping is allowed, another set of $S^{2}$ comparisons is performed by flipping $Y_{j}$ (that is, the 1st position in $Y_{j}$ becomes the last and vice-versa), resulting in $2 \times S^{2}$ comparisons. Eventually, the distance between $X$ and $Y$ is the minimum of the $S^{2}$ (without flipping) or  $2 \times S^{2}$ (with flipping) values. For each distance, the indices $i$ and $j$ and whether $Y_{j}$ was flipped in the best comparison are remembered as they allow to rebuilt the optimal alignment between $X$ and $Y$. The naive algorithm to do this is $\Theta(S^{2} \times L')$ in time however I could design a faster algorithm which is $\Theta(S \times L')$ by using a dynamic programming approach (see algorithm \ref{algo_distance_fast}).
 
 % iteration walk-through
-SPar-K is initialized by choosing $K$ regions to become the initial cluster aggregations of length $L$ either i) randomly (Algorithm \ref{algo_seed_random}) or ii) using the K-means++ sampling procedure (Algorithm \ref{algo_seed_kmeans++}, \cite{arthur_k-means++:_2007}). Then, each regions is aligned against each cluster aggregation an assigned to the cluster to which it has the smallest distance with. Once all $N$ regions have been aligned to a cluster, the cluster aggregations are updated by computed the average signal at each position in the alignments.
+SPar-K is initialized by choosing $K$ regions to become the initial cluster aggregations of length $L$ either i) randomly (Algorithm \ref{algo_seed_random}) or ii) using the K-means++ \citep{arthur_k-means++:_2007} sampling procedure (see Algorithm \ref{algo_seed_kmeans++}). Then, each regions is aligned against each cluster aggregation an assigned to the cluster to which it has the smallest distance with. Once all $N$ regions have been aligned to a cluster, the cluster aggregations are updated by computing the average signal at each position in the alignments.
 
-This procedure and is then repeated until i) reaching the maximum number of iterations or ii) achieving convergence.
+This procedure and is repeated until i) reaching the maximum number of iterations or ii) achieving convergence, that is when the alignments in each cluster do not change from one iteration to the next.
 
 
 \section{Implementation}
 
 SPar-K algorithm has been implemented as a stand-alone, fully multithreaded, C++ program. Regarding the parallellization, the computations at each step are independent of each other, leading to an ”embarrassingly parallel” situation. Thus, at each step, the computations are split into equal amounts and distributed over a pool of worker threads. Eventually, the program returns a table listing for each region the cluster assignment, the shift state and the orientation. The software distribution also includes R scripts for visualizing the data as heatmaps as shown in Figure \ref{spark_dnase}. The software source code is available from Github \url{https://github.com/romaingroux/SPar-K} and as Docker container \url{https://hub.docker.com/r/rgroux/spar-k}.
 
 \section{Benchmarking}
 
 \begin{figure}
-	\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure1.pdf}}
-	\caption{Synthethic datasets : \textbf{A} The class signal densities. \textbf{B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf{C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf{D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf{E} one of the corresponding SPar-K partition, with shifting and flipping.}.
+	\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure1.png}}
+	\caption{Synthethic datasets : \textbf{A} The class signal densities. \textbf{B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf{C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf{D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf{E} one of the corresponding SPar-K partition, with shifting and flipping.
+Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.}.
 	\label{spark_simulated_data}
 \end{figure}
 
 % supplemental figure 2 from article
 \begin{figure}
-	\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure2.pdf}}
+	\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure2.png}}
 \caption{\textbf{Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times.
 Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.}
 \label{spark_ari}
 \end{figure}
 
 % supplemental figure 4 from article
 \begin{figure}
-\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure4.pdf}}
+\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure4.png}}
 \caption{\textbf{Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf{A} Seeding done at random, \textbf{B} seeding done at random and outlier smoothing \textbf{C} seeding done with the K-means++ method \textbf{D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value).
 Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.}
 \label{spark_sse}
 \end{figure}
 
 % supplemental figure 5 from article
 \begin{figure}
-\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure5.pdf}}
+\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure5.png}}
 \caption{\textbf{Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances.
 Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.}
 \label{spark_time}
 \end{figure}
 
-First I compared SPar-K, regular K-means and ChIP-partitioning on synthetic datasets exhibiting properties that are plausible for ChIP-seq profiles for genomic regions. 
+First I compared SPar-K, regular K-means and ChIPPartitioning on synthetic datasets exhibiting properties that are plausible for ChIP-seq profiles for genomic regions. 
 
 \subsection{K-means}
 For the regular K-means, the "kccaFamily" function from the "flexclust" R package \citep{leisch_toolbox_2006} was used. Calls to kccaFamily(dist=distEuclidean, cent=centMean) or kccaFamily(dist=distCor, cent=centMean) were employed to partition the data using the euclidean distance and the correlation distance respectively. "distEuclidean" is a package defined function and "distCor", a custom function computing $1 - cor(x,y)$ for any two $x$ and $y$ vectors. If the correlation between $x$ and $y$ could not be computed (for instance the standard deviation of $x$ or $y$ is equal to 0), the correlation was assumed to be 0 (and the distance 1). The initial centers were chosen using one of the two following seeding strategies : i) a random sampling of $K$ points or ii) K-means++, a strategy aiming at sampling $K$ initial points as far as possible from each other.
 
 \subsection{ChIPPartitioning}
 The implementation was done in R programming language. The "em\_shape", "em\_shape\_shift" and "em\_shape\_shift\_flip" functions present in the supplemental material of \citep{nair_probabilistic_2014} were taken as such and incorporated in a R wrapper (as in Chapter \ref{encode_peaks}). For this method, the partitioning could only be initialized using a random procedure, as described in \citep{nair_probabilistic_2014}.
 
 \subsection{Data}
 I generated several synthetic datasets. Each dataset contained 1000 regions of 2001bp (+/- 1kb around a central position), equally distributed over 3 classes. The signal over a region was modeled as a mixture of class specific signal and of background signal. The class specific signal was modeled by a 1902 element density vector. The background signal was modeled using a second 1902 element density vector containing a uniform density. The first class density vector contained a Gaussian density with mean 951 and standard deviation 40 (Figure \ref{spark_simulated_data}A upper panel). The second class density was a Gaussian density of mean 950 and standard deviation 40. To create an asymmetric signal class, the values at positions 950 to 1902 (comprised) were set to the minimal value found in the original density (Figure \ref{spark_simulated_data}A middle panel). The last class contained a rectangular function with a step corresponding to the elements 830 to 1070 (Figure \ref{spark_simulated_data}A lower panel). Finally, all the densities were normalized such that the sum of each vector was 1. From these densities, the $\lambda$ values for a class $k$ were computed using the following formula :
 
 \begin{equation}
 lambdas_{k} = signal_{k} * c * p_{s} + background * c * p_{b}
 \end{equation}
 
 where $signal_{k}$ is the class characteristic signal density, $background$ a uniform density, $c$ the coverage factor, $p_{s}$ the overall signal proportion and $p_{b}$ the overall background proportion, with the constraint $p_{s} + p_{b} = 1$.
 
 % generations
 For each region, a read signal of 1902bp long was randomly sampled from Poisson distributions with the $lambdas$ values as function parameters. Then, the signal vector was introduced, in a 2001 element long vector filled of 0's, at a given offset, in a given orientation. The offset was randomly sampled from 1 to 100 . The orientation was randomly sampled with a probability of 0.3 to be in the reversed orientation. Finally, the resulting 2001bp vectors were binned using a 10bp window, that is, the signal was summed up every 10 columns leading to the creation of 201 bin long vectors. At the end of the process, a dataset was stored as a matrix of 1000 rows and 201 columns. Two examples of synthetic datasets are shown in Figure \ref{spark_simulated_data}B and D.\\
 
 \subsection{Performances}
-Performance was assessed by the Adjusted Rand index (see Figure \ref{spark_ari} and Supplemental Figure 3 in \citep{groux_spar-k:_2019}) and the optimal number of classes was estimated by the elbow method (Figure \ref{spark_sse}). As expected, regular K-means performed poorly. On the contrary, SPar-K was equally accurate as ChIP-Partitioning except for the lowest coverage class. Considering speed, Spar-K outperformed ChIP-partitioning by a factor of at least 20 (Figure \ref{spark_time}).
+Performances were assessed using the Adjusted Rand index (see Figure \ref{spark_ari} and Supplemental Figure 3 in \citep{groux_spar-k:_2019}) and the optimal number of classes was estimated using the elbow method (Figure \ref{spark_sse}). As expected, regular K-means performed poorly. On the contrary, SPar-K was equally accurate as ChIPPartitioning except for the lowest coverage class. Considering speed, Spar-K outperformed ChIPPartitioning by a factor of at least 20 (Figure \ref{spark_time}).
 
 \section{Partition of DNase and MNase data}
 
-% supplemental figure 8 from article
-\begin{figure}
-	\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure8.pdf}}
-	\caption{Nucleosome occupancy, determined by MNase-seq, in bins of 10bp,  +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf{A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf{B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf{C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf{D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf{E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf{F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep{ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf{G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf{H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster.}
-	\label{spark_ctcf}
-\end{figure}
-
 % figure 1 from article
 \begin{figure}
-\centerline{\includegraphics[scale=0.4]{images/ch_spark/figure1.pdf}}
+\centerline{\includegraphics[scale=0.4]{images/ch_spark/figure1.png}}
 \caption{Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf{A.} Input data based on peak summits provided by ENCODE. \textbf{B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively.  \textbf{C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf{D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf{E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf{F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf{G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf{B}.
 Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.}
 \label{spark_dnase}
 \end{figure}
 
-I applied SPar-K with $K=3$ to DNaseI accessibility profiles (2bp resolution) around  7'206 ChIP-seq SP1-binding peaks  (+/-300bp relative to peak summit) in K562 cells (Figure \ref{spark_dnase}A). The results revealed the presence of clear footprints in all the clusters (Figure \ref{spark_dnase}B). To validate these footprints, I checked whether they are consistent with to location of nucleosomes (Figure \ref{spark_dnase}C) and SP1 binding motifs (Figure \ref{spark_dnase}D), which was indeed the case. De novo motif analysis of the narrow footprints seen in Figure \ref{spark_dnase}B with MEME-ChIP and Tomtom \citep{bailey_meme_2009} discovered SP1-related, NFYA/B and GATA motifs (Figure \ref{spark_dnase}G) the latter two reportedly being interaction partners of SP1. Taken together, these results suggest that SPar-K is able to precisely refocus initially misaligned DNaseI profiles around SP1 binding sites.
+% supplemental figure 8 from article
+\begin{figure}
+	\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure8.png}}
+	\caption{Nucleosome occupancy, determined by MNase-seq, in bins of 10bp,  +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf{A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf{B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf{C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf{D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf{E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf{F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep{ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf{G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf{H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster.
+Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.}
+	\label{spark_ctcf}
+\end{figure}
+
+I applied SPar-K with $K=3$ to DNaseI accessibility profiles (2bp resolution) around  7'206 ChIP-seq SP1-binding peaks  (+/-300bp relative to peak summit) in K562 cells (Figure \ref{spark_dnase}A). The results revealed the presence of clear footprints in all the clusters (Figure \ref{spark_dnase}B). To validate these footprints, I checked whether they were consistent with the location of nucleosomes (Figure \ref{spark_dnase}C) and SP1 binding motifs (Figure \ref{spark_dnase}D), which was indeed the case. A de novo motif analysis of the narrow footprints seen in Figure \ref{spark_dnase}B with MEME-ChIP and Tomtom \citep{bailey_meme_2009} revealed SP1-related, NFYA/B and GATA motifs (Figure \ref{spark_dnase}G) the latter two reportedly being interaction partners of SP1. Taken together, these results suggest that SPar-K is able to precisely refocus initially misaligned DNaseI profiles around SP1 binding sites.
 
-The partitioning of SP1 binding regions reveals distinct chromatin landscapes. Cluster 1 (red) groups binding sites lying between two closely spaced nucleosomes. Cluster 2 (blue) shows strong asymmetry suggestive of promoter regions, an interpretation supported by the presence of promoter-associated transcription starts sites (TSSs) and CAGE tags (Figure \ref{spark_dnase}E and F). Finally, the symmetrical cluster 3 (green) contains binding sites located on large nucleosome-free regions reminiscent of enhancer regions.
+The partitioning of SP1 binding regions revealed distinct chromatin landscapes. Cluster 1 (red) groups binding sites lying between two closely spaced nucleosomes. Cluster 2 (blue) showed a strong asymmetry suggestive of promoter regions, an interpretation supported by the presence of TSSs indicative of promoters and of CAGE tags (Figure \ref{spark_dnase}E and F). Finally, the symmetrical cluster 3 (green) contained binding sites located on a large nucleosome-free regions reminiscent of enhancer regions.
 
 As a second example, I ran the same type of analysis on nucleosome profiles around CTCF binding sites (Figure \ref{spark_ctcf}). Overall, the results confirm observations from Chapter \ref{encode_peaks} and published in \citep{kundaje_ubiquitous_2012}. Strong nucleosome arrays became visible in all classes after realignment, with three out of four showing strong asymmetry in addition.
 
 \section{Conclusions}
 
-SPar-K is a useful partitioning method for moderately misaligned and randomly oriented chromatin regions. Compared to existing methods, it is competitive in terms of accuracy, superior in speed, applicable to a wider range of input signals (not restricted to count data) and easier to use.
+SPar-K is a useful partitioning method for moderately misaligned and randomly oriented chromatin regions. Compared to existing methods, it is competitive in terms of accuracy, superior in speed, applicable to a wider range of input signals (not restricted to count data) and easy to use.
diff --git a/my_thesis.aux b/my_thesis.aux
index c06422c..410dd7a 100644
--- a/my_thesis.aux
+++ b/my_thesis.aux
@@ -1,194 +1,194 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
 \providecommand\BKM@entry[2]{}
 \catcode `:\active 
 \catcode `;\active 
 \catcode `!\active 
 \catcode `?\active 
 \catcode `"\active 
 \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
 \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
 \global\let\oldcontentsline\contentsline
 \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
 \global\let\oldnewlabel\newlabel
 \gdef\newlabel#1#2{\newlabelxx{#1}#2}
 \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
 \AtEndDocument{\ifx\hyper@anchor\@undefined
 \let\contentsline\oldcontentsline
 \let\newlabel\oldnewlabel
 \fi}
 \fi}
 \global\let\hyper@last\relax 
 \gdef\HyperFirstAtBeginDocument#1{#1}
 \providecommand\HyField@AuxAddToFields[1]{}
 \providecommand\HyField@AuxAddToCoFields[2]{}
 \providecommand \oddpage@label [2]{}
 \babel@aux{english}{}
 \babel@aux{french}{}
 \babel@aux{english}{}
 \@input{head/dedication.aux}
 \@input{head/acknowledgements.aux}
 \BKM@entry{id=1,dest={636861707465722A2E31},srcline={3}}{41636B6E6F776C656467656D656E7473}
 \pgfsyspdfmark {pgfid2}{0}{40463552}
 \pgfsyspdfmark {pgfid1}{6}{40498788}
 \@input{head/abstracts.aux}
 \BKM@entry{id=2,dest={636861707465722A2E32},srcline={9}}{4162737472616374205C28456E676C6973682F4672616E5C3334376169732F446575747363685C29}
 \pgfsyspdfmark {pgfid4}{0}{40463552}
 \pgfsyspdfmark {pgfid3}{6}{40498788}
 \pgfsyspdfmark {pgfid6}{0}{40463552}
 \pgfsyspdfmark {pgfid5}{6}{40498788}
 \BKM@entry{id=3,dest={746F632E30},srcline={30}}{436F6E74656E7473}
 \pgfsyspdfmark {pgfid8}{0}{40463552}
 \pgfsyspdfmark {pgfid7}{6}{40498788}
 \@input{main/ch_introduction.aux}
 \BKM@entry{id=4,dest={636861707465722E31},srcline={2}}{496E74726F64756374696F6E}
 \BKM@entry{id=5,dest={636861707465722E31},srcline={5}}{496E74726F64756374696F6E}
 \BKM@entry{id=6,dest={73656374696F6E2E312E31},srcline={13}}{41626F7574206368726F6D6174696E}
 \BKM@entry{id=7,dest={73756273656374696F6E2E312E312E31},srcline={18}}{546865206368726F6D6174696E20737472756374757265}
 \pgfsyspdfmark {pgfid10}{0}{40463552}
 \pgfsyspdfmark {pgfid9}{6}{40511883}
 \BKM@entry{id=8,dest={73756273656374696F6E2E312E312E32},srcline={42}}{546865206368726F6D6174696E2069732064796E616D6963}
 \BKM@entry{id=9,dest={73756273656374696F6E2E312E312E33},srcline={52}}{41626F7574206E75636C656F736F6D6520706F736974696F6E696E67}
 \BKM@entry{id=10,dest={73656374696F6E2E312E32},srcline={79}}{41626F7574207472616E736372697074696F6E20666163746F7273}
 \BKM@entry{id=11,dest={73756273656374696F6E2E312E322E31},srcline={95}}{544620636F2D62696E64696E67}
 \BKM@entry{id=12,dest={73656374696F6E2E312E33},srcline={126}}{47656E6520726567756C6174696F6E20696E2061206E75747368656C6C}
 \BKM@entry{id=13,dest={73756273656374696F6E2E312E332E31},srcline={139}}{546865206368726F6D6174696E2062617272696572}
 \BKM@entry{id=14,dest={73756273656374696F6E2E312E332E32},srcline={144}}{54467320636F6F70657261746976652062696E64696E67}
 \BKM@entry{id=15,dest={73756273656374696F6E2E312E332E33},srcline={151}}{50696F6E65657220544673}
 \BKM@entry{id=16,dest={73756273656374696F6E2E312E332E34},srcline={162}}{526567756C61746F727920656C656D656E7473}
 \BKM@entry{id=17,dest={73756273656374696F6E2E312E332E35},srcline={175}}{5468652067656E6F6D6520676F6573203344}
 \BKM@entry{id=18,dest={73656374696F6E2E312E34},srcline={186}}{4D6561737572696E67206368726F6D6174696E206665617475726573}
 \BKM@entry{id=19,dest={73756273656374696F6E2E312E342E31},srcline={191}}{4D6561737572696E672054462062696E64696E6720696E207669766F}
 \BKM@entry{id=20,dest={73756273656374696F6E2E312E342E32},srcline={203}}{4D6561737572696E672054462062696E64696E6720696E20766974726F}
 \BKM@entry{id=21,dest={73756273656374696F6E2E312E342E33},srcline={216}}{4D6561737572696E67206E75636C656F736F6D65206F63637570616E6379}
 \BKM@entry{id=22,dest={73756273656374696F6E2E312E342E34},srcline={232}}{4469676974616C20666F6F747072696E74696E67}
 \BKM@entry{id=23,dest={73656374696F6E2E312E35},srcline={264}}{4D6F64656C696E672073657175656E6365207370656369666963697479}
 \BKM@entry{id=24,dest={73756273656374696F6E2E312E352E31},srcline={329}}{416C69676E696E672062696E64696E67207369746573}
 \BKM@entry{id=25,dest={73756273656374696F6E2E312E352E32},srcline={337}}{506C6174697475646573}
 \BKM@entry{id=26,dest={73756273656374696F6E2E312E352E33},srcline={356}}{50726564696374696E672062696E64696E67207369746573}
 \BKM@entry{id=27,dest={73656374696F6E2E312E36},srcline={389}}{4F7665722D726570726573656E746564207061747465726E7320646973636F76657279}
 \@input{main/ch_lab_resources.aux}
 \BKM@entry{id=28,dest={636861707465722E32},srcline={2}}{4C61626F7261746F7279207265736F7572636573}
 \BKM@entry{id=29,dest={636861707465722E32},srcline={5}}{4C61626F7261746F7279207265736F7572636573}
 \BKM@entry{id=30,dest={73656374696F6E2E322E31},srcline={14}}{4D6173732047656E6F6D6520416E6E6F746174696F6E207265706F7369746F7279}
 \pgfsyspdfmark {pgfid13}{0}{40463552}
 \pgfsyspdfmark {pgfid12}{6}{40511883}
 \BKM@entry{id=31,dest={73756273656374696F6E2E322E312E31},srcline={21}}{4D474120636F6E74656E7420616E64206F7267616E697A6174696F6E}
 \BKM@entry{id=32,dest={73756273656374696F6E2E322E312E32},srcline={51}}{436F6E636C7573696F6E73}
 \BKM@entry{id=33,dest={73656374696F6E2E322E32},srcline={57}}{45756B6172796F7469632050726F6D6F746572204461746162617365}
 \BKM@entry{id=34,dest={73756273656374696F6E2E322E322E31},srcline={75}}{4550446E6577206E6F7720616E6E6F7461746573205C28736F6D65206F665C2920796F7572206D757368726F6F6D7320616E6420766567657461626C6573}
 \BKM@entry{id=35,dest={73756273656374696F6E2E322E322E32},srcline={109}}{496E63726561736564206D617070696E6720707265636973696F6E20696E2068756D616E}
 \BKM@entry{id=36,dest={73756273656374696F6E2E322E322E33},srcline={121}}{496E746567726174696F6E206F66204550446E65772077697468206F74686572207265736F7572636573}
 \BKM@entry{id=37,dest={73756273656374696F6E2E322E322E34},srcline={127}}{436F6E636C7573696F6E73}
 \BKM@entry{id=38,dest={73756273656374696F6E2E322E322E35},srcline={133}}{4D6574686F6473}
 \@input{main/ch_encode_peaks.aux}
 \BKM@entry{id=39,dest={636861707465722E33},srcline={2}}{454E434F4445207065616B7320616E616C79736973}
 \BKM@entry{id=40,dest={636861707465722E33},srcline={5}}{454E434F4445207065616B7320616E616C79736973}
 \BKM@entry{id=41,dest={73656374696F6E2E332E31},srcline={19}}{44617461}
 \pgfsyspdfmark {pgfid15}{0}{40463552}
 \pgfsyspdfmark {pgfid14}{6}{40511883}
 \BKM@entry{id=42,dest={73656374696F6E2E332E32},srcline={45}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206368726F6D6174696E2061726368697465637475726573}
 \BKM@entry{id=43,dest={73756273656374696F6E2E332E322E31},srcline={86}}{44617461207265616C69676E6D656E74}
 \BKM@entry{id=44,dest={73656374696F6E2E332E33},srcline={98}}{4E75636C656F736F6D65206F7267616E697A6174696F6E2061726F756E64207472616E736372697074696F6E20666163746F722062696E64696E67207369746573}
 \BKM@entry{id=45,dest={73656374696F6E2E332E34},srcline={126}}{5468652063617365206F6620435443462C2052414432312C20534D43332C2059593120616E64205A4E46313433}
 \BKM@entry{id=46,dest={73656374696F6E2E332E35},srcline={165}}{4354434620616E64204A756E4420696E7465726163746F6D6573}
 \BKM@entry{id=47,dest={73656374696F6E2E332E36},srcline={258}}{454246312062696E6473206E75636C656F736F6D6573}
 \BKM@entry{id=48,dest={73656374696F6E2E332E37},srcline={295}}{44697363757373696F6E}
 \BKM@entry{id=49,dest={73656374696F6E2E332E38},srcline={311}}{4D6574686F6473}
 \BKM@entry{id=50,dest={73756273656374696F6E2E332E382E31},srcline={313}}{4461746120616E6420646174612070726F63657373696E67}
 \BKM@entry{id=51,dest={73756273656374696F6E2E332E382E32},srcline={326}}{436C617373696669636174696F6E206F66204D4E617365207061747465726E73}
 \BKM@entry{id=52,dest={73756273656374696F6E2E332E382E33},srcline={339}}{5175616E74696679696E67206E75636C656F736F6D6520617272617920696E74656E736974792066726F6D20636C617373696669636174696F6E20726573756C7473}
 \BKM@entry{id=53,dest={73756273656374696F6E2E332E382E34},srcline={368}}{5065616B20636F6C6F63616C697A6174696F6E}
 \BKM@entry{id=54,dest={73756273656374696F6E2E332E382E35},srcline={372}}{4E445220646574656374696F6E}
 \BKM@entry{id=55,dest={73756273656374696F6E2E332E382E36},srcline={464}}{4354434620616E64204A756E4420696E7465726163746F7273}
 \BKM@entry{id=56,dest={73756273656374696F6E2E332E382E37},srcline={476}}{4542463120616E64206E75636C656F736F6D65}
 \@input{main/ch_spark.aux}
 \BKM@entry{id=57,dest={636861707465722E34},srcline={2}}{535061722D4B}
 \BKM@entry{id=58,dest={73656374696F6E2E342E31},srcline={15}}{416C676F726974686D}
 \pgfsyspdfmark {pgfid17}{0}{40463552}
 \pgfsyspdfmark {pgfid16}{6}{40511883}
 \BKM@entry{id=59,dest={73656374696F6E2E342E32},srcline={35}}{496D706C656D656E746174696F6E}
 \BKM@entry{id=60,dest={73656374696F6E2E342E33},srcline={39}}{42656E63686D61726B696E67}
-\BKM@entry{id=61,dest={73756273656374696F6E2E342E332E31},srcline={73}}{4B2D6D65616E73}
-\BKM@entry{id=62,dest={73756273656374696F6E2E342E332E32},srcline={76}}{43684950506172746974696F6E696E67}
-\BKM@entry{id=63,dest={73756273656374696F6E2E342E332E33},srcline={79}}{44617461}
-\BKM@entry{id=64,dest={73756273656374696F6E2E342E332E34},srcline={91}}{506572666F726D616E636573}
-\BKM@entry{id=65,dest={73656374696F6E2E342E34},srcline={94}}{506172746974696F6E206F6620444E61736520616E64204D4E6173652064617461}
-\BKM@entry{id=66,dest={73656374696F6E2E342E35},srcline={117}}{436F6E636C7573696F6E73}
+\BKM@entry{id=61,dest={73756273656374696F6E2E342E332E31},srcline={74}}{4B2D6D65616E73}
+\BKM@entry{id=62,dest={73756273656374696F6E2E342E332E32},srcline={77}}{43684950506172746974696F6E696E67}
+\BKM@entry{id=63,dest={73756273656374696F6E2E342E332E33},srcline={80}}{44617461}
+\BKM@entry{id=64,dest={73756273656374696F6E2E342E332E34},srcline={92}}{506572666F726D616E636573}
+\BKM@entry{id=65,dest={73656374696F6E2E342E34},srcline={95}}{506172746974696F6E206F6620444E61736520616E64204D4E6173652064617461}
+\BKM@entry{id=66,dest={73656374696F6E2E342E35},srcline={119}}{436F6E636C7573696F6E73}
 \@input{main/ch_smile-seq.aux}
 \BKM@entry{id=67,dest={636861707465722E35},srcline={2}}{534D694C452D736571206461746120616E616C79736973}
 \BKM@entry{id=68,dest={636861707465722E35},srcline={5}}{534D694C452D736571206461746120616E616C79736973}
 \BKM@entry{id=69,dest={73656374696F6E2E352E31},srcline={19}}{496E74726F64756374696F6E}
 \pgfsyspdfmark {pgfid19}{0}{40463552}
 \pgfsyspdfmark {pgfid18}{6}{40511883}
 \BKM@entry{id=70,dest={73656374696F6E2E352E32},srcline={36}}{48696464656E204D61726B6F76204D6F64656C204D6F74696620646973636F76657279}
 \BKM@entry{id=71,dest={73656374696F6E2E352E33},srcline={61}}{42696E64696E67206D6F746966206576616C756174696F6E}
 \BKM@entry{id=72,dest={73656374696F6E2E352E34},srcline={114}}{526573756C7473}
 \BKM@entry{id=73,dest={73656374696F6E2E352E35},srcline={134}}{436F6E636C7573696F6E73}
 \@input{main/ch_pwmscan.aux}
 \BKM@entry{id=74,dest={636861707465722E36},srcline={2}}{50574D5363616E}
 \BKM@entry{id=75,dest={73656374696F6E2E362E31},srcline={24}}{416C676F726974686D73}
 \BKM@entry{id=76,dest={73756273656374696F6E2E362E312E31},srcline={28}}{5363616E6E657220616C676F726974686D}
 \pgfsyspdfmark {pgfid21}{0}{40463552}
 \pgfsyspdfmark {pgfid20}{6}{40511883}
 \BKM@entry{id=77,dest={73756273656374696F6E2E362E312E32},srcline={34}}{4D61746368657320656E756D65726174696F6E20616E64206D617070696E67}
 \BKM@entry{id=78,dest={73656374696F6E2E362E32},srcline={46}}{504D575363616E20617263686974656374757265}
 \BKM@entry{id=79,dest={73656374696F6E2E362E33},srcline={81}}{42656E63686D61726B}
 \BKM@entry{id=80,dest={73656374696F6E2E362E34},srcline={153}}{436F6E636C7573696F6E73}
 \@input{main/ch_atac-seq.aux}
 \BKM@entry{id=81,dest={636861707465722E37},srcline={2}}{4368726F6D6174696E206163636573736962696C697479206F66206D6F6E6F6379746573}
 \BKM@entry{id=82,dest={73656374696F6E2E372E31},srcline={9}}{4D6F6E69746F72696E672054462062696E64696E67}
 \pgfsyspdfmark {pgfid23}{0}{40463552}
 \pgfsyspdfmark {pgfid22}{6}{40511883}
 \BKM@entry{id=83,dest={73656374696F6E2E372E32},srcline={18}}{54686520616476656E74206F662073696E676C652063656C6C20444746}
 \BKM@entry{id=84,dest={73656374696F6E2E372E33},srcline={44}}{4F70656E20697373756573}
 \BKM@entry{id=85,dest={73656374696F6E2E372E34},srcline={48}}{44617461}
 \BKM@entry{id=86,dest={73656374696F6E2E372E35},srcline={59}}{4964656E74696679696E67206F7665722D726570726573656E746564207369676E616C73}
 \BKM@entry{id=87,dest={73756273656374696F6E2E372E352E31},srcline={63}}{43684950506172746974696F6E696E6720616C676F726974686D}
 \BKM@entry{id=88,dest={73756273656374696F6E2E372E352E32},srcline={75}}{454D53657175656E636520616C676F726974686D}
 \BKM@entry{id=89,dest={73756273656374696F6E2E372E352E33},srcline={175}}{454D4A6F696E7420616C676F726974686D}
 \BKM@entry{id=90,dest={73756273656374696F6E2E372E352E34},srcline={212}}{44617461207265616C69676E6D656E74}
 \BKM@entry{id=91,dest={73756273656374696F6E2E372E352E35},srcline={225}}{536F6674206167677265676174696F6E20706C6F7473}
 \BKM@entry{id=92,dest={73656374696F6E2E372E36},srcline={235}}{446174612070726F63657373696E67}
 \BKM@entry{id=93,dest={73656374696F6E2E372E37},srcline={245}}{526573756C7473}
 \BKM@entry{id=94,dest={73756273656374696F6E2E372E372E31},srcline={249}}{416C69676E696E67207468652062696E64696E67207369746573}
 \BKM@entry{id=95,dest={73756273656374696F6E2E372E372E32},srcline={276}}{4578706C6F72696E6720696E646976696475616C20544620636C6173736573}
 \BKM@entry{id=96,dest={73656374696F6E2E372E38},srcline={292}}{44697363757373696F6E73}
 \BKM@entry{id=97,dest={73656374696F6E2E372E39},srcline={302}}{506572737065637469766573}
 \BKM@entry{id=98,dest={73656374696F6E2E372E3130},srcline={312}}{4D6574686F6473}
 \BKM@entry{id=99,dest={73756273656374696F6E2E372E31302E31},srcline={314}}{436F646520617661696C6162696C697479}
 \BKM@entry{id=100,dest={73756273656374696F6E2E372E31302E32},srcline={318}}{4461746120736F7572636573}
 \BKM@entry{id=101,dest={73756273656374696F6E2E372E31302E33},srcline={329}}{4461746120706F73742D70726F63657373696E67}
 \BKM@entry{id=102,dest={73756273656374696F6E2E372E31302E34},srcline={339}}{4D6F64656C20657874656E73696F6E}
 \BKM@entry{id=103,dest={73756273656374696F6E2E372E31302E35},srcline={351}}{45787472616374696E6720646174612061737369676E656420746F206120636C617373}
 \BKM@entry{id=104,dest={73756273656374696F6E2E372E31302E36},srcline={430}}{50726F6772616D73}
 \BKM@entry{id=105,dest={73756273656374696F6E2E372E31302E37},srcline={460}}{467261676D656E7420636C6173736573}
 \BKM@entry{id=106,dest={73756273656374696F6E2E372E31302E38},srcline={480}}{53696D756C617465642073657175656E636573}
 \BKM@entry{id=107,dest={73756273656374696F6E2E372E31302E39},srcline={483}}{42696E64696E6720736974652070726564696374696F6E}
 \BKM@entry{id=108,dest={73756273656374696F6E2E372E31302E3130},srcline={487}}{5265616C69676E6D656E74207573696E67204A4153504152206D6F74696673}
 \BKM@entry{id=109,dest={73756273656374696F6E2E372E31302E3131},srcline={551}}{506572205446207375622D636C6173736573}
 \@input{main/ch_discussion.aux}
 \BKM@entry{id=110,dest={636861707465722E38},srcline={2}}{44697363757373696F6E}
 \BKM@entry{id=111,dest={636861707465722E38},srcline={5}}{44697363757373696F6E73}
 \pgfsyspdfmark {pgfid25}{0}{40463552}
 \pgfsyspdfmark {pgfid24}{6}{40511883}
 \@writefile{toc}{\vspace  {\normalbaselineskip }}
 \@input{tail/appendix.aux}
 \BKM@entry{id=112,dest={617070656E6469782E41},srcline={5}}{537570706C656D656E74617279206D6174657269616C}
 \BKM@entry{id=113,dest={73656374696F6E2E412E31},srcline={9}}{454E434F4445207065616B7320616E616C7973697320737570706C656D656E74617279206D6174657269616C}
 \pgfsyspdfmark {pgfid27}{0}{40463552}
 \pgfsyspdfmark {pgfid26}{-2013849}{40511883}
 \BKM@entry{id=114,dest={73656374696F6E2E412E32},srcline={102}}{535061722D4B20737570706C656D656E74617279206D6174657269616C}
 \BKM@entry{id=115,dest={73656374696F6E2E412E33},srcline={522}}{534D694C452D73657120737570706C656D656E74617279206D6174657269616C}
 \BKM@entry{id=116,dest={73656374696F6E2E412E34},srcline={534}}{4368726F6D6174696E206163636573736962696C697479206F66206D6F6E6F637974657320737570706C656D656E74617279206D6174657269616C}
 \BKM@entry{id=117,dest={73756273656374696F6E2E412E342E31},srcline={536}}{467261676D656E742073697A6520616E616C79736973}
 \BKM@entry{id=118,dest={73756273656374696F6E2E412E342E32},srcline={563}}{4D6561737572696E67206F70656E206368726F6D6174696E20616E64206E75636C656F736F6D65206F63637570616E6379}
 \BKM@entry{id=119,dest={73756273656374696F6E2E412E342E33},srcline={594}}{4576616C756174696F6E206F6620454D53657175656E636520616E642043684950506172746974696F6E696E67}
 \BKM@entry{id=120,dest={73756273656374696F6E2E412E342E34},srcline={694}}{4F7468657220737570706C656D656E746172792066696775726573}
 \@input{tail/biblio.aux}
 \BKM@entry{id=121,dest={73656374696F6E2A2E3634},srcline={3}}{4269626C696F677261706879}
 \pgfsyspdfmark {pgfid29}{0}{40463552}
 \pgfsyspdfmark {pgfid28}{6}{40498788}
 \BKM@entry{id=122,dest={617070656E6469782A2E3635},srcline={6}}{4269626C696F677261706879}
 \@input{tail/cv.aux}
 \BKM@entry{id=123,dest={73656374696F6E2A2E3636},srcline={4}}{437572726963756C756D205669746165}
diff --git a/my_thesis.blg b/my_thesis.blg
index ad60a71..88100ab 100644
--- a/my_thesis.blg
+++ b/my_thesis.blg
@@ -1,63 +1,62 @@
 This is BibTeX, Version 0.99d (TeX Live 2017/Debian)
 Capacity: max_strings=100000, hash_size=100000, hash_prime=85009
 The top-level auxiliary file: my_thesis.aux
 A level-1 auxiliary file: head/dedication.aux
 A level-1 auxiliary file: head/acknowledgements.aux
 A level-1 auxiliary file: head/abstracts.aux
 A level-1 auxiliary file: main/ch_introduction.aux
 A level-1 auxiliary file: main/ch_lab_resources.aux
 A level-1 auxiliary file: main/ch_encode_peaks.aux
 A level-1 auxiliary file: main/ch_spark.aux
 A level-1 auxiliary file: main/ch_smile-seq.aux
 A level-1 auxiliary file: main/ch_pwmscan.aux
 A level-1 auxiliary file: main/ch_atac-seq.aux
 A level-1 auxiliary file: main/ch_discussion.aux
 A level-1 auxiliary file: tail/appendix.aux
 A level-1 auxiliary file: tail/biblio.aux
 The style file: apalike.bst
 A level-1 auxiliary file: tail/cv.aux
 Database file #1: tail/bibliography.bib
 Warning--I didn't find a database entry for ""
-Warning--I didn't find a database entry for "nielsen_catchprofiles"
 You've used 150 entries,
             1935 wiz_defined-function locations,
-            1236 strings with 45997 characters,
+            1235 strings with 45976 characters,
 and the built_in function-call counts, 86730 in all, are:
 = -- 7865
 > -- 6234
 < -- 58
 + -- 2349
 - -- 2316
 * -- 8872
 := -- 15368
 add.period$ -- 454
 call.type$ -- 150
 change.case$ -- 1906
 chr.to.int$ -- 149
 cite$ -- 150
 duplicate$ -- 2170
 empty$ -- 4530
 format.name$ -- 2538
 if$ -- 16042
 int.to.chr$ -- 2
 int.to.str$ -- 0
 missing$ -- 151
 newline$ -- 752
 num.names$ -- 458
 pop$ -- 1586
 preamble$ -- 1
 purify$ -- 1906
 quote$ -- 0
 skip$ -- 1619
 stack$ -- 0
 substring$ -- 5452
 swap$ -- 211
 text.length$ -- 10
 text.prefix$ -- 0
 top$ -- 0
 type$ -- 896
 warning$ -- 0
 while$ -- 575
 width$ -- 0
 write$ -- 1960
-(There were 2 warnings)
+(There was 1 warning)
diff --git a/my_thesis.log b/my_thesis.log
index 68ff325..1ea5105 100644
--- a/my_thesis.log
+++ b/my_thesis.log
@@ -1,4059 +1,4051 @@
-This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12)  16 JAN 2020 16:14
+This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12)  17 JAN 2020 17:01
 entering extended mode
  restricted \write18 enabled.
  %&-line parsing enabled.
 **my_thesis.tex
 (./my_thesis.tex
 LaTeX2e <2017-04-15>
 Babel <3.18> and hyphenation patterns for 84 language(s) loaded.
 (./head/settings_epfl_template.tex
 (/usr/share/texlive/texmf-dist/tex/latex/base/book.cls
 Document Class: book 2014/09/29 v1.4h Standard LaTeX document class
 (/usr/share/texlive/texmf-dist/tex/latex/base/bk11.clo
 File: bk11.clo 2014/09/29 v1.4h Standard LaTeX file (size option)
 )
 \c@part=\count79
 \c@chapter=\count80
 \c@section=\count81
 \c@subsection=\count82
 \c@subsubsection=\count83
 \c@paragraph=\count84
 \c@subparagraph=\count85
 \c@figure=\count86
 \c@table=\count87
 \abovecaptionskip=\skip41
 \belowcaptionskip=\skip42
 \bibindent=\dimen102
 )
 (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty
 Package: fontenc 2017/04/05 v2.0i Standard LaTeX package
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def
 File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file
 LaTeX Font Info:    Redeclaring font encoding T1 on input line 48.
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty
 Package: inputenc 2015/03/17 v1.2c Input encoding file
 \inpenc@prehook=\toks14
 \inpenc@posthook=\toks15
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def
 File: utf8.def 2017/01/28 v1.1t UTF-8 support for inputenc
 Now handling font encoding OML ...
 ... no UTF-8 mapping file for font encoding OML
 Now handling font encoding T1 ...
 ... processing UTF-8 mapping file for font encoding T1
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu
 File: t1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc
    defining Unicode char U+00A0 (decimal 160)
    defining Unicode char U+00A1 (decimal 161)
    defining Unicode char U+00A3 (decimal 163)
    defining Unicode char U+00AB (decimal 171)
    defining Unicode char U+00AD (decimal 173)
    defining Unicode char U+00BB (decimal 187)
    defining Unicode char U+00BF (decimal 191)
    defining Unicode char U+00C0 (decimal 192)
    defining Unicode char U+00C1 (decimal 193)
    defining Unicode char U+00C2 (decimal 194)
    defining Unicode char U+00C3 (decimal 195)
    defining Unicode char U+00C4 (decimal 196)
    defining Unicode char U+00C5 (decimal 197)
    defining Unicode char U+00C6 (decimal 198)
    defining Unicode char U+00C7 (decimal 199)
    defining Unicode char U+00C8 (decimal 200)
    defining Unicode char U+00C9 (decimal 201)
    defining Unicode char U+00CA (decimal 202)
    defining Unicode char U+00CB (decimal 203)
    defining Unicode char U+00CC (decimal 204)
    defining Unicode char U+00CD (decimal 205)
    defining Unicode char U+00CE (decimal 206)
    defining Unicode char U+00CF (decimal 207)
    defining Unicode char U+00D0 (decimal 208)
    defining Unicode char U+00D1 (decimal 209)
    defining Unicode char U+00D2 (decimal 210)
    defining Unicode char U+00D3 (decimal 211)
    defining Unicode char U+00D4 (decimal 212)
    defining Unicode char U+00D5 (decimal 213)
    defining Unicode char U+00D6 (decimal 214)
    defining Unicode char U+00D8 (decimal 216)
    defining Unicode char U+00D9 (decimal 217)
    defining Unicode char U+00DA (decimal 218)
    defining Unicode char U+00DB (decimal 219)
    defining Unicode char U+00DC (decimal 220)
    defining Unicode char U+00DD (decimal 221)
    defining Unicode char U+00DE (decimal 222)
    defining Unicode char U+00DF (decimal 223)
    defining Unicode char U+00E0 (decimal 224)
    defining Unicode char U+00E1 (decimal 225)
    defining Unicode char U+00E2 (decimal 226)
    defining Unicode char U+00E3 (decimal 227)
    defining Unicode char U+00E4 (decimal 228)
    defining Unicode char U+00E5 (decimal 229)
    defining Unicode char U+00E6 (decimal 230)
    defining Unicode char U+00E7 (decimal 231)
    defining Unicode char U+00E8 (decimal 232)
    defining Unicode char U+00E9 (decimal 233)
    defining Unicode char U+00EA (decimal 234)
    defining Unicode char U+00EB (decimal 235)
    defining Unicode char U+00EC (decimal 236)
    defining Unicode char U+00ED (decimal 237)
    defining Unicode char U+00EE (decimal 238)
    defining Unicode char U+00EF (decimal 239)
    defining Unicode char U+00F0 (decimal 240)
    defining Unicode char U+00F1 (decimal 241)
    defining Unicode char U+00F2 (decimal 242)
    defining Unicode char U+00F3 (decimal 243)
    defining Unicode char U+00F4 (decimal 244)
    defining Unicode char U+00F5 (decimal 245)
    defining Unicode char U+00F6 (decimal 246)
    defining Unicode char U+00F8 (decimal 248)
    defining Unicode char U+00F9 (decimal 249)
    defining Unicode char U+00FA (decimal 250)
    defining Unicode char U+00FB (decimal 251)
    defining Unicode char U+00FC (decimal 252)
    defining Unicode char U+00FD (decimal 253)
    defining Unicode char U+00FE (decimal 254)
    defining Unicode char U+00FF (decimal 255)
    defining Unicode char U+0100 (decimal 256)
    defining Unicode char U+0101 (decimal 257)
    defining Unicode char U+0102 (decimal 258)
    defining Unicode char U+0103 (decimal 259)
    defining Unicode char U+0104 (decimal 260)
    defining Unicode char U+0105 (decimal 261)
    defining Unicode char U+0106 (decimal 262)
    defining Unicode char U+0107 (decimal 263)
    defining Unicode char U+0108 (decimal 264)
    defining Unicode char U+0109 (decimal 265)
    defining Unicode char U+010A (decimal 266)
    defining Unicode char U+010B (decimal 267)
    defining Unicode char U+010C (decimal 268)
    defining Unicode char U+010D (decimal 269)
    defining Unicode char U+010E (decimal 270)
    defining Unicode char U+010F (decimal 271)
    defining Unicode char U+0110 (decimal 272)
    defining Unicode char U+0111 (decimal 273)
    defining Unicode char U+0112 (decimal 274)
    defining Unicode char U+0113 (decimal 275)
    defining Unicode char U+0114 (decimal 276)
    defining Unicode char U+0115 (decimal 277)
    defining Unicode char U+0116 (decimal 278)
    defining Unicode char U+0117 (decimal 279)
    defining Unicode char U+0118 (decimal 280)
    defining Unicode char U+0119 (decimal 281)
    defining Unicode char U+011A (decimal 282)
    defining Unicode char U+011B (decimal 283)
    defining Unicode char U+011C (decimal 284)
    defining Unicode char U+011D (decimal 285)
    defining Unicode char U+011E (decimal 286)
    defining Unicode char U+011F (decimal 287)
    defining Unicode char U+0120 (decimal 288)
    defining Unicode char U+0121 (decimal 289)
    defining Unicode char U+0122 (decimal 290)
    defining Unicode char U+0123 (decimal 291)
    defining Unicode char U+0124 (decimal 292)
    defining Unicode char U+0125 (decimal 293)
    defining Unicode char U+0128 (decimal 296)
    defining Unicode char U+0129 (decimal 297)
    defining Unicode char U+012A (decimal 298)
    defining Unicode char U+012B (decimal 299)
    defining Unicode char U+012C (decimal 300)
    defining Unicode char U+012D (decimal 301)
    defining Unicode char U+012E (decimal 302)
    defining Unicode char U+012F (decimal 303)
    defining Unicode char U+0130 (decimal 304)
    defining Unicode char U+0131 (decimal 305)
    defining Unicode char U+0132 (decimal 306)
    defining Unicode char U+0133 (decimal 307)
    defining Unicode char U+0134 (decimal 308)
    defining Unicode char U+0135 (decimal 309)
    defining Unicode char U+0136 (decimal 310)
    defining Unicode char U+0137 (decimal 311)
    defining Unicode char U+0139 (decimal 313)
    defining Unicode char U+013A (decimal 314)
    defining Unicode char U+013B (decimal 315)
    defining Unicode char U+013C (decimal 316)
    defining Unicode char U+013D (decimal 317)
    defining Unicode char U+013E (decimal 318)
    defining Unicode char U+0141 (decimal 321)
    defining Unicode char U+0142 (decimal 322)
    defining Unicode char U+0143 (decimal 323)
    defining Unicode char U+0144 (decimal 324)
    defining Unicode char U+0145 (decimal 325)
    defining Unicode char U+0146 (decimal 326)
    defining Unicode char U+0147 (decimal 327)
    defining Unicode char U+0148 (decimal 328)
    defining Unicode char U+014A (decimal 330)
    defining Unicode char U+014B (decimal 331)
    defining Unicode char U+014C (decimal 332)
    defining Unicode char U+014D (decimal 333)
    defining Unicode char U+014E (decimal 334)
    defining Unicode char U+014F (decimal 335)
    defining Unicode char U+0150 (decimal 336)
    defining Unicode char U+0151 (decimal 337)
    defining Unicode char U+0152 (decimal 338)
    defining Unicode char U+0153 (decimal 339)
    defining Unicode char U+0154 (decimal 340)
    defining Unicode char U+0155 (decimal 341)
    defining Unicode char U+0156 (decimal 342)
    defining Unicode char U+0157 (decimal 343)
    defining Unicode char U+0158 (decimal 344)
    defining Unicode char U+0159 (decimal 345)
    defining Unicode char U+015A (decimal 346)
    defining Unicode char U+015B (decimal 347)
    defining Unicode char U+015C (decimal 348)
    defining Unicode char U+015D (decimal 349)
    defining Unicode char U+015E (decimal 350)
    defining Unicode char U+015F (decimal 351)
    defining Unicode char U+0160 (decimal 352)
    defining Unicode char U+0161 (decimal 353)
    defining Unicode char U+0162 (decimal 354)
    defining Unicode char U+0163 (decimal 355)
    defining Unicode char U+0164 (decimal 356)
    defining Unicode char U+0165 (decimal 357)
    defining Unicode char U+0168 (decimal 360)
    defining Unicode char U+0169 (decimal 361)
    defining Unicode char U+016A (decimal 362)
    defining Unicode char U+016B (decimal 363)
    defining Unicode char U+016C (decimal 364)
    defining Unicode char U+016D (decimal 365)
    defining Unicode char U+016E (decimal 366)
    defining Unicode char U+016F (decimal 367)
    defining Unicode char U+0170 (decimal 368)
    defining Unicode char U+0171 (decimal 369)
    defining Unicode char U+0172 (decimal 370)
    defining Unicode char U+0173 (decimal 371)
    defining Unicode char U+0174 (decimal 372)
    defining Unicode char U+0175 (decimal 373)
    defining Unicode char U+0176 (decimal 374)
    defining Unicode char U+0177 (decimal 375)
    defining Unicode char U+0178 (decimal 376)
    defining Unicode char U+0179 (decimal 377)
    defining Unicode char U+017A (decimal 378)
    defining Unicode char U+017B (decimal 379)
    defining Unicode char U+017C (decimal 380)
    defining Unicode char U+017D (decimal 381)
    defining Unicode char U+017E (decimal 382)
    defining Unicode char U+01CD (decimal 461)
    defining Unicode char U+01CE (decimal 462)
    defining Unicode char U+01CF (decimal 463)
    defining Unicode char U+01D0 (decimal 464)
    defining Unicode char U+01D1 (decimal 465)
    defining Unicode char U+01D2 (decimal 466)
    defining Unicode char U+01D3 (decimal 467)
    defining Unicode char U+01D4 (decimal 468)
    defining Unicode char U+01E2 (decimal 482)
    defining Unicode char U+01E3 (decimal 483)
    defining Unicode char U+01E6 (decimal 486)
    defining Unicode char U+01E7 (decimal 487)
    defining Unicode char U+01E8 (decimal 488)
    defining Unicode char U+01E9 (decimal 489)
    defining Unicode char U+01EA (decimal 490)
    defining Unicode char U+01EB (decimal 491)
    defining Unicode char U+01F0 (decimal 496)
    defining Unicode char U+01F4 (decimal 500)
    defining Unicode char U+01F5 (decimal 501)
    defining Unicode char U+0218 (decimal 536)
    defining Unicode char U+0219 (decimal 537)
    defining Unicode char U+021A (decimal 538)
    defining Unicode char U+021B (decimal 539)
    defining Unicode char U+0232 (decimal 562)
    defining Unicode char U+0233 (decimal 563)
    defining Unicode char U+1E02 (decimal 7682)
    defining Unicode char U+1E03 (decimal 7683)
    defining Unicode char U+200C (decimal 8204)
    defining Unicode char U+2010 (decimal 8208)
    defining Unicode char U+2011 (decimal 8209)
    defining Unicode char U+2012 (decimal 8210)
    defining Unicode char U+2013 (decimal 8211)
    defining Unicode char U+2014 (decimal 8212)
    defining Unicode char U+2015 (decimal 8213)
    defining Unicode char U+2018 (decimal 8216)
    defining Unicode char U+2019 (decimal 8217)
    defining Unicode char U+201A (decimal 8218)
    defining Unicode char U+201C (decimal 8220)
    defining Unicode char U+201D (decimal 8221)
    defining Unicode char U+201E (decimal 8222)
    defining Unicode char U+2030 (decimal 8240)
    defining Unicode char U+2031 (decimal 8241)
    defining Unicode char U+2039 (decimal 8249)
    defining Unicode char U+203A (decimal 8250)
    defining Unicode char U+2423 (decimal 9251)
    defining Unicode char U+1E20 (decimal 7712)
    defining Unicode char U+1E21 (decimal 7713)
 )
 Now handling font encoding OT1 ...
 ... processing UTF-8 mapping file for font encoding OT1
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu
 File: ot1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc
    defining Unicode char U+00A0 (decimal 160)
    defining Unicode char U+00A1 (decimal 161)
    defining Unicode char U+00A3 (decimal 163)
    defining Unicode char U+00AD (decimal 173)
    defining Unicode char U+00B8 (decimal 184)
    defining Unicode char U+00BF (decimal 191)
    defining Unicode char U+00C5 (decimal 197)
    defining Unicode char U+00C6 (decimal 198)
    defining Unicode char U+00D8 (decimal 216)
    defining Unicode char U+00DF (decimal 223)
    defining Unicode char U+00E6 (decimal 230)
    defining Unicode char U+00EC (decimal 236)
    defining Unicode char U+00ED (decimal 237)
    defining Unicode char U+00EE (decimal 238)
    defining Unicode char U+00EF (decimal 239)
    defining Unicode char U+00F8 (decimal 248)
    defining Unicode char U+0131 (decimal 305)
    defining Unicode char U+0141 (decimal 321)
    defining Unicode char U+0142 (decimal 322)
    defining Unicode char U+0152 (decimal 338)
    defining Unicode char U+0153 (decimal 339)
    defining Unicode char U+0174 (decimal 372)
    defining Unicode char U+0175 (decimal 373)
    defining Unicode char U+0176 (decimal 374)
    defining Unicode char U+0177 (decimal 375)
    defining Unicode char U+0218 (decimal 536)
    defining Unicode char U+0219 (decimal 537)
    defining Unicode char U+021A (decimal 538)
    defining Unicode char U+021B (decimal 539)
    defining Unicode char U+2013 (decimal 8211)
    defining Unicode char U+2014 (decimal 8212)
    defining Unicode char U+2018 (decimal 8216)
    defining Unicode char U+2019 (decimal 8217)
    defining Unicode char U+201C (decimal 8220)
    defining Unicode char U+201D (decimal 8221)
 )
 Now handling font encoding OMS ...
 ... processing UTF-8 mapping file for font encoding OMS
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu
 File: omsenc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc
    defining Unicode char U+00A7 (decimal 167)
    defining Unicode char U+00B6 (decimal 182)
    defining Unicode char U+00B7 (decimal 183)
    defining Unicode char U+2020 (decimal 8224)
    defining Unicode char U+2021 (decimal 8225)
    defining Unicode char U+2022 (decimal 8226)
 )
 Now handling font encoding OMX ...
 ... no UTF-8 mapping file for font encoding OMX
 Now handling font encoding U ...
 ... no UTF-8 mapping file for font encoding U
    defining Unicode char U+00A9 (decimal 169)
    defining Unicode char U+00AA (decimal 170)
    defining Unicode char U+00AE (decimal 174)
    defining Unicode char U+00BA (decimal 186)
    defining Unicode char U+02C6 (decimal 710)
    defining Unicode char U+02DC (decimal 732)
    defining Unicode char U+200C (decimal 8204)
    defining Unicode char U+2026 (decimal 8230)
    defining Unicode char U+2122 (decimal 8482)
    defining Unicode char U+2423 (decimal 9251)
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/natbib/natbib.sty
 Package: natbib 2010/09/13 8.31b (PWD, AO)
 \bibhang=\skip43
 \bibsep=\skip44
 LaTeX Info: Redefining \cite on input line 694.
 \c@NAT@ctr=\count88
 )
 (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.sty
 Package: babel 2018/02/14 3.18 The Babel package
 
 (/usr/share/texlive/texmf-dist/tex/generic/babel/switch.def
 File: switch.def 2018/02/14 3.18 Babel switching mechanism
 )
 (/usr/share/texlive/texmf-dist/tex/generic/babel-french/french.ldf
 Language: french 2018/02/04 v3.4b French support from the babel system
 
 (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.def
 File: babel.def 2018/02/14 3.18 Babel common definitions
 \babel@savecnt=\count89
 \U@D=\dimen103
 
 (/usr/share/texlive/texmf-dist/tex/generic/babel/txtbabel.def)
 \bbl@dirlevel=\count90
 )
 \l@acadian = a dialect from \language\l@french 
 \FB@nonchar=\count91
 Package babel Info: Making : an active character on input line 411.
 Package babel Info: Making ; an active character on input line 412.
 Package babel Info: Making ! an active character on input line 413.
 Package babel Info: Making ? an active character on input line 414.
 \FBguill@level=\count92
 \FB@everypar=\toks16
 \FB@Mht=\dimen104
 \mc@charclass=\count93
 \mc@charfam=\count94
 \mc@charslot=\count95
 \std@mcc=\count96
 \dec@mcc=\count97
 \c@FBcaption@count=\count98
 \listindentFB=\skip45
 \descindentFB=\skip46
 \labelwidthFB=\skip47
 \leftmarginFB=\skip48
 \parindentFFN=\dimen105
 \FBfnindent=\skip49
 )
 (/usr/share/texlive/texmf-dist/tex/generic/babel-german/german.ldf
 Language: german 2016/11/02 v2.9 German support for babel (traditional orthogra
 phy)
 
 (/usr/share/texlive/texmf-dist/tex/generic/babel-german/germanb.ldf
 Language: germanb 2016/11/02 v2.9 German support for babel (traditional orthogr
 aphy)
 Package babel Info: Making " an active character on input line 139.
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/babel-english/english.ldf
 Language: english 2017/06/06 v3.3r English support from the babel system
 \l@canadian = a dialect from \language\l@american 
 \l@australian = a dialect from \language\l@british 
 \l@newzealand = a dialect from \language\l@british 
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/carlisle/scalefnt.sty)
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty
 Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
 \KV@toks@=\toks17
 )
 (/usr/share/texmf/tex/latex/lm/lmodern.sty
 Package: lmodern 2009/10/30 v1.6 Latin Modern Fonts
 LaTeX Font Info:    Overwriting symbol font `operators' in version `normal'
 (Font)                  OT1/cmr/m/n --> OT1/lmr/m/n on input line 22.
 LaTeX Font Info:    Overwriting symbol font `letters' in version `normal'
 (Font)                  OML/cmm/m/it --> OML/lmm/m/it on input line 23.
 LaTeX Font Info:    Overwriting symbol font `symbols' in version `normal'
 (Font)                  OMS/cmsy/m/n --> OMS/lmsy/m/n on input line 24.
 LaTeX Font Info:    Overwriting symbol font `largesymbols' in version `normal'
 (Font)                  OMX/cmex/m/n --> OMX/lmex/m/n on input line 25.
 LaTeX Font Info:    Overwriting symbol font `operators' in version `bold'
 (Font)                  OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 26.
 LaTeX Font Info:    Overwriting symbol font `letters' in version `bold'
 (Font)                  OML/cmm/b/it --> OML/lmm/b/it on input line 27.
 LaTeX Font Info:    Overwriting symbol font `symbols' in version `bold'
 (Font)                  OMS/cmsy/b/n --> OMS/lmsy/b/n on input line 28.
 LaTeX Font Info:    Overwriting symbol font `largesymbols' in version `bold'
 (Font)                  OMX/cmex/m/n --> OMX/lmex/m/n on input line 29.
 LaTeX Font Info:    Overwriting math alphabet `\mathbf' in version `normal'
 (Font)                  OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 31.
 LaTeX Font Info:    Overwriting math alphabet `\mathsf' in version `normal'
 (Font)                  OT1/cmss/m/n --> OT1/lmss/m/n on input line 32.
 LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `normal'
 (Font)                  OT1/cmr/m/it --> OT1/lmr/m/it on input line 33.
 LaTeX Font Info:    Overwriting math alphabet `\mathtt' in version `normal'
 (Font)                  OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 34.
 LaTeX Font Info:    Overwriting math alphabet `\mathbf' in version `bold'
 (Font)                  OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 35.
 LaTeX Font Info:    Overwriting math alphabet `\mathsf' in version `bold'
 (Font)                  OT1/cmss/bx/n --> OT1/lmss/bx/n on input line 36.
 LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `bold'
 (Font)                  OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37.
 LaTeX Font Info:    Overwriting math alphabet `\mathtt' in version `bold'
 (Font)                  OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier.sty
 Package: fourier 2005/01/01 1.4 fourier-GUTenberg package
 Now handling font encoding FML ...
 ... no UTF-8 mapping file for font encoding FML
 Now handling font encoding FMS ...
 ... no UTF-8 mapping file for font encoding FMS
 Now handling font encoding FMX ...
 ... no UTF-8 mapping file for font encoding FMX
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty
 Package: fontenc 2017/04/05 v2.0i Standard LaTeX package
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def
 File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file
 LaTeX Font Info:    Redeclaring font encoding T1 on input line 48.
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty
 Package: textcomp 2017/04/05 v2.0i Standard LaTeX package
 Package textcomp Info: Sub-encoding information:
 (textcomp)               5 = only ISO-Adobe without \textcurrency
 (textcomp)               4 = 5 + \texteuro
 (textcomp)               3 = 4 + \textohm
 (textcomp)               2 = 3 + \textestimated + \textcurrency
 (textcomp)               1 = TS1 - \textcircled - \t
 (textcomp)               0 = TS1 (full)
 (textcomp)             Font families with sub-encoding setting implement
 (textcomp)             only a restricted character set as indicated.
 (textcomp)             Family '?' is the default used for unknown fonts.
 (textcomp)             See the documentation for details.
 Package textcomp Info: Setting ? sub-encoding to TS1/1 on input line 79.
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def
 File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file
 Now handling font encoding TS1 ...
 ... processing UTF-8 mapping file for font encoding TS1
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu
 File: ts1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc
    defining Unicode char U+00A2 (decimal 162)
    defining Unicode char U+00A3 (decimal 163)
    defining Unicode char U+00A4 (decimal 164)
    defining Unicode char U+00A5 (decimal 165)
    defining Unicode char U+00A6 (decimal 166)
    defining Unicode char U+00A7 (decimal 167)
    defining Unicode char U+00A8 (decimal 168)
    defining Unicode char U+00A9 (decimal 169)
    defining Unicode char U+00AA (decimal 170)
    defining Unicode char U+00AC (decimal 172)
    defining Unicode char U+00AE (decimal 174)
    defining Unicode char U+00AF (decimal 175)
    defining Unicode char U+00B0 (decimal 176)
    defining Unicode char U+00B1 (decimal 177)
    defining Unicode char U+00B2 (decimal 178)
    defining Unicode char U+00B3 (decimal 179)
    defining Unicode char U+00B4 (decimal 180)
    defining Unicode char U+00B5 (decimal 181)
    defining Unicode char U+00B6 (decimal 182)
    defining Unicode char U+00B7 (decimal 183)
    defining Unicode char U+00B9 (decimal 185)
    defining Unicode char U+00BA (decimal 186)
    defining Unicode char U+00BC (decimal 188)
    defining Unicode char U+00BD (decimal 189)
    defining Unicode char U+00BE (decimal 190)
    defining Unicode char U+00D7 (decimal 215)
    defining Unicode char U+00F7 (decimal 247)
    defining Unicode char U+0192 (decimal 402)
    defining Unicode char U+02C7 (decimal 711)
    defining Unicode char U+02D8 (decimal 728)
    defining Unicode char U+02DD (decimal 733)
    defining Unicode char U+0E3F (decimal 3647)
    defining Unicode char U+2016 (decimal 8214)
    defining Unicode char U+2020 (decimal 8224)
    defining Unicode char U+2021 (decimal 8225)
    defining Unicode char U+2022 (decimal 8226)
    defining Unicode char U+2030 (decimal 8240)
    defining Unicode char U+2031 (decimal 8241)
    defining Unicode char U+203B (decimal 8251)
    defining Unicode char U+203D (decimal 8253)
    defining Unicode char U+2044 (decimal 8260)
    defining Unicode char U+204E (decimal 8270)
    defining Unicode char U+2052 (decimal 8274)
    defining Unicode char U+20A1 (decimal 8353)
    defining Unicode char U+20A4 (decimal 8356)
    defining Unicode char U+20A6 (decimal 8358)
    defining Unicode char U+20A9 (decimal 8361)
    defining Unicode char U+20AB (decimal 8363)
    defining Unicode char U+20AC (decimal 8364)
    defining Unicode char U+20B1 (decimal 8369)
    defining Unicode char U+2103 (decimal 8451)
    defining Unicode char U+2116 (decimal 8470)
    defining Unicode char U+2117 (decimal 8471)
    defining Unicode char U+211E (decimal 8478)
    defining Unicode char U+2120 (decimal 8480)
    defining Unicode char U+2122 (decimal 8482)
    defining Unicode char U+2126 (decimal 8486)
    defining Unicode char U+2127 (decimal 8487)
    defining Unicode char U+212E (decimal 8494)
    defining Unicode char U+2190 (decimal 8592)
    defining Unicode char U+2191 (decimal 8593)
    defining Unicode char U+2192 (decimal 8594)
    defining Unicode char U+2193 (decimal 8595)
    defining Unicode char U+2329 (decimal 9001)
    defining Unicode char U+232A (decimal 9002)
    defining Unicode char U+2422 (decimal 9250)
    defining Unicode char U+25E6 (decimal 9702)
    defining Unicode char U+25EF (decimal 9711)
    defining Unicode char U+266A (decimal 9834)
 ))
 LaTeX Info: Redefining \oldstylenums on input line 334.
 Package textcomp Info: Setting cmr sub-encoding to TS1/0 on input line 349.
 Package textcomp Info: Setting cmss sub-encoding to TS1/0 on input line 350.
 Package textcomp Info: Setting cmtt sub-encoding to TS1/0 on input line 351.
 Package textcomp Info: Setting cmvtt sub-encoding to TS1/0 on input line 352.
 Package textcomp Info: Setting cmbr sub-encoding to TS1/0 on input line 353.
 Package textcomp Info: Setting cmtl sub-encoding to TS1/0 on input line 354.
 Package textcomp Info: Setting ccr sub-encoding to TS1/0 on input line 355.
 Package textcomp Info: Setting ptm sub-encoding to TS1/4 on input line 356.
 Package textcomp Info: Setting pcr sub-encoding to TS1/4 on input line 357.
 Package textcomp Info: Setting phv sub-encoding to TS1/4 on input line 358.
 Package textcomp Info: Setting ppl sub-encoding to TS1/3 on input line 359.
 Package textcomp Info: Setting pag sub-encoding to TS1/4 on input line 360.
 Package textcomp Info: Setting pbk sub-encoding to TS1/4 on input line 361.
 Package textcomp Info: Setting pnc sub-encoding to TS1/4 on input line 362.
 Package textcomp Info: Setting pzc sub-encoding to TS1/4 on input line 363.
 Package textcomp Info: Setting bch sub-encoding to TS1/4 on input line 364.
 Package textcomp Info: Setting put sub-encoding to TS1/5 on input line 365.
 Package textcomp Info: Setting uag sub-encoding to TS1/5 on input line 366.
 Package textcomp Info: Setting ugq sub-encoding to TS1/5 on input line 367.
 Package textcomp Info: Setting ul8 sub-encoding to TS1/4 on input line 368.
 Package textcomp Info: Setting ul9 sub-encoding to TS1/4 on input line 369.
 Package textcomp Info: Setting augie sub-encoding to TS1/5 on input line 370.
 Package textcomp Info: Setting dayrom sub-encoding to TS1/3 on input line 371.
 Package textcomp Info: Setting dayroms sub-encoding to TS1/3 on input line 372.
 
 Package textcomp Info: Setting pxr sub-encoding to TS1/0 on input line 373.
 Package textcomp Info: Setting pxss sub-encoding to TS1/0 on input line 374.
 Package textcomp Info: Setting pxtt sub-encoding to TS1/0 on input line 375.
 Package textcomp Info: Setting txr sub-encoding to TS1/0 on input line 376.
 Package textcomp Info: Setting txss sub-encoding to TS1/0 on input line 377.
 Package textcomp Info: Setting txtt sub-encoding to TS1/0 on input line 378.
 Package textcomp Info: Setting lmr sub-encoding to TS1/0 on input line 379.
 Package textcomp Info: Setting lmdh sub-encoding to TS1/0 on input line 380.
 Package textcomp Info: Setting lmss sub-encoding to TS1/0 on input line 381.
 Package textcomp Info: Setting lmssq sub-encoding to TS1/0 on input line 382.
 Package textcomp Info: Setting lmvtt sub-encoding to TS1/0 on input line 383.
 Package textcomp Info: Setting lmtt sub-encoding to TS1/0 on input line 384.
 Package textcomp Info: Setting qhv sub-encoding to TS1/0 on input line 385.
 Package textcomp Info: Setting qag sub-encoding to TS1/0 on input line 386.
 Package textcomp Info: Setting qbk sub-encoding to TS1/0 on input line 387.
 Package textcomp Info: Setting qcr sub-encoding to TS1/0 on input line 388.
 Package textcomp Info: Setting qcs sub-encoding to TS1/0 on input line 389.
 Package textcomp Info: Setting qpl sub-encoding to TS1/0 on input line 390.
 Package textcomp Info: Setting qtm sub-encoding to TS1/0 on input line 391.
 Package textcomp Info: Setting qzc sub-encoding to TS1/0 on input line 392.
 Package textcomp Info: Setting qhvc sub-encoding to TS1/0 on input line 393.
 Package textcomp Info: Setting futs sub-encoding to TS1/4 on input line 394.
 Package textcomp Info: Setting futx sub-encoding to TS1/4 on input line 395.
 Package textcomp Info: Setting futj sub-encoding to TS1/4 on input line 396.
 Package textcomp Info: Setting hlh sub-encoding to TS1/3 on input line 397.
 Package textcomp Info: Setting hls sub-encoding to TS1/3 on input line 398.
 Package textcomp Info: Setting hlst sub-encoding to TS1/3 on input line 399.
 Package textcomp Info: Setting hlct sub-encoding to TS1/5 on input line 400.
 Package textcomp Info: Setting hlx sub-encoding to TS1/5 on input line 401.
 Package textcomp Info: Setting hlce sub-encoding to TS1/5 on input line 402.
 Package textcomp Info: Setting hlcn sub-encoding to TS1/5 on input line 403.
 Package textcomp Info: Setting hlcw sub-encoding to TS1/5 on input line 404.
 Package textcomp Info: Setting hlcf sub-encoding to TS1/5 on input line 405.
 Package textcomp Info: Setting pplx sub-encoding to TS1/3 on input line 406.
 Package textcomp Info: Setting pplj sub-encoding to TS1/3 on input line 407.
 Package textcomp Info: Setting ptmx sub-encoding to TS1/4 on input line 408.
 Package textcomp Info: Setting ptmj sub-encoding to TS1/4 on input line 409.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier-orns.sty
 Package: fourier-orns 2004/01/30 1.1 fourier-ornaments package
 )
 LaTeX Font Info:    Redeclaring symbol font `operators' on input line 50.
 LaTeX Font Info:    Encoding `OT1' has changed to `T1' for symbol font
 (Font)              `operators' in the math version `normal' on input line 50.
 LaTeX Font Info:    Overwriting symbol font `operators' in version `normal'
 (Font)                  OT1/lmr/m/n --> T1/futs/m/n on input line 50.
 LaTeX Font Info:    Encoding `OT1' has changed to `T1' for symbol font
 (Font)              `operators' in the math version `bold' on input line 50.
 LaTeX Font Info:    Overwriting symbol font `operators' in version `bold'
 (Font)                  OT1/lmr/bx/n --> T1/futs/m/n on input line 50.
 LaTeX Font Info:    Overwriting symbol font `operators' in version `bold'
 (Font)                  T1/futs/m/n --> T1/futs/b/n on input line 51.
 LaTeX Font Info:    Redeclaring symbol font `letters' on input line 59.
 LaTeX Font Info:    Encoding `OML' has changed to `FML' for symbol font
 (Font)              `letters' in the math version `normal' on input line 59.
 LaTeX Font Info:    Overwriting symbol font `letters' in version `normal'
 (Font)                  OML/lmm/m/it --> FML/futmi/m/it on input line 59.
 LaTeX Font Info:    Encoding `OML' has changed to `FML' for symbol font
 (Font)              `letters' in the math version `bold' on input line 59.
 LaTeX Font Info:    Overwriting symbol font `letters' in version `bold'
 (Font)                  OML/lmm/b/it --> FML/futmi/m/it on input line 59.
 \symotherletters=\mathgroup4
 LaTeX Font Info:    Overwriting symbol font `letters' in version `bold'
 (Font)                  FML/futmi/m/it --> FML/futmi/b/it on input line 61.
 LaTeX Font Info:    Overwriting symbol font `otherletters' in version `bold'
 (Font)                  FML/futm/m/it --> FML/futm/b/it on input line 62.
 LaTeX Font Info:    Redeclaring math symbol \Gamma on input line 63.
 LaTeX Font Info:    Redeclaring math symbol \Delta on input line 64.
 LaTeX Font Info:    Redeclaring math symbol \Theta on input line 65.
 LaTeX Font Info:    Redeclaring math symbol \Lambda on input line 66.
 LaTeX Font Info:    Redeclaring math symbol \Xi on input line 67.
 LaTeX Font Info:    Redeclaring math symbol \Pi on input line 68.
 LaTeX Font Info:    Redeclaring math symbol \Sigma on input line 69.
 LaTeX Font Info:    Redeclaring math symbol \Upsilon on input line 70.
 LaTeX Font Info:    Redeclaring math symbol \Phi on input line 71.
 LaTeX Font Info:    Redeclaring math symbol \Psi on input line 72.
 LaTeX Font Info:    Redeclaring math symbol \Omega on input line 73.
 LaTeX Font Info:    Redeclaring symbol font `symbols' on input line 113.
 LaTeX Font Info:    Encoding `OMS' has changed to `FMS' for symbol font
 (Font)              `symbols' in the math version `normal' on input line 113.
 LaTeX Font Info:    Overwriting symbol font `symbols' in version `normal'
 (Font)                  OMS/lmsy/m/n --> FMS/futm/m/n on input line 113.
 LaTeX Font Info:    Encoding `OMS' has changed to `FMS' for symbol font
 (Font)              `symbols' in the math version `bold' on input line 113.
 LaTeX Font Info:    Overwriting symbol font `symbols' in version `bold'
 (Font)                  OMS/lmsy/b/n --> FMS/futm/m/n on input line 113.
 LaTeX Font Info:    Redeclaring symbol font `largesymbols' on input line 114.
 LaTeX Font Info:    Encoding `OMX' has changed to `FMX' for symbol font
 (Font)              `largesymbols' in the math version `normal' on input line 1
 14.
 LaTeX Font Info:    Overwriting symbol font `largesymbols' in version `normal'
 (Font)                  OMX/lmex/m/n --> FMX/futm/m/n on input line 114.
 LaTeX Font Info:    Encoding `OMX' has changed to `FMX' for symbol font
 (Font)              `largesymbols' in the math version `bold' on input line 114
 .
 LaTeX Font Info:    Overwriting symbol font `largesymbols' in version `bold'
 (Font)                  OMX/lmex/m/n --> FMX/futm/m/n on input line 114.
 LaTeX Font Info:    Redeclaring math alphabet \mathbf on input line 115.
 LaTeX Font Info:    Overwriting math alphabet `\mathbf' in version `normal'
 (Font)                  OT1/lmr/bx/n --> T1/futs/bx/n on input line 115.
 LaTeX Font Info:    Overwriting math alphabet `\mathbf' in version `bold'
 (Font)                  OT1/lmr/bx/n --> T1/futs/bx/n on input line 115.
 LaTeX Font Info:    Redeclaring math alphabet \mathrm on input line 116.
 LaTeX Font Info:    Redeclaring math alphabet \mathit on input line 117.
 LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `normal'
 (Font)                  OT1/lmr/m/it --> T1/futs/m/it on input line 117.
 LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `bold'
 (Font)                  OT1/lmr/bx/it --> T1/futs/m/it on input line 117.
 LaTeX Font Info:    Redeclaring math alphabet \mathcal on input line 118.
 LaTeX Font Info:    Redeclaring math symbol \parallel on input line 134.
 LaTeX Font Info:    Redeclaring math symbol \hbar on input line 148.
 LaTeX Font Info:    Redeclaring math symbol \varkappa on input line 186.
 LaTeX Font Info:    Redeclaring math symbol \varvarrho on input line 187.
 LaTeX Font Info:    Redeclaring math delimiter \Vert on input line 210.
 LaTeX Font Info:    Redeclaring math delimiter \vert on input line 215.
 LaTeX Font Info:    Redeclaring math delimiter \Downarrow on input line 225.
 LaTeX Font Info:    Redeclaring math delimiter \backslash on input line 227.
 LaTeX Font Info:    Redeclaring math delimiter \rangle on input line 229.
 LaTeX Font Info:    Redeclaring math delimiter \langle on input line 231.
 LaTeX Font Info:    Redeclaring math delimiter \rbrace on input line 233.
 LaTeX Font Info:    Redeclaring math delimiter \lbrace on input line 235.
 LaTeX Font Info:    Redeclaring math delimiter \rceil on input line 237.
 LaTeX Font Info:    Redeclaring math delimiter \lceil on input line 239.
 LaTeX Font Info:    Redeclaring math delimiter \rfloor on input line 241.
 LaTeX Font Info:    Redeclaring math delimiter \lfloor on input line 243.
 LaTeX Font Info:    Redeclaring math accent \acute on input line 247.
 LaTeX Font Info:    Redeclaring math accent \grave on input line 248.
 LaTeX Font Info:    Redeclaring math accent \ddot on input line 249.
 LaTeX Font Info:    Redeclaring math accent \tilde on input line 250.
 LaTeX Font Info:    Redeclaring math accent \bar on input line 251.
 LaTeX Font Info:    Redeclaring math accent \breve on input line 252.
 LaTeX Font Info:    Redeclaring math accent \check on input line 253.
 LaTeX Font Info:    Redeclaring math accent \hat on input line 254.
 LaTeX Font Info:    Redeclaring math accent \dot on input line 255.
 LaTeX Font Info:    Redeclaring math accent \mathring on input line 256.
 \symUfutm=\mathgroup5
 )
 (/usr/share/texlive/texmf-dist/tex/latex/setspace/setspace.sty
 Package: setspace 2011/12/19 v6.7a set line spacing
 )
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphicx.sty
 Package: graphicx 2017/06/01 v1.1a Enhanced LaTeX Graphics (DPC,SPQR)
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphics.sty
 Package: graphics 2017/06/25 v1.2c Standard LaTeX Graphics (DPC,SPQR)
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/trig.sty
 Package: trig 2016/01/03 v1.10 sin cos tan (DPC)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
 File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
 )
 Package graphics Info: Driver file: pdftex.def on input line 99.
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics-def/pdftex.def
 File: pdftex.def 2018/01/08 v1.0l Graphics/color driver for pdftex
 ))
 \Gin@req@height=\dimen106
 \Gin@req@width=\dimen107
 )
 (/usr/share/texlive/texmf-dist/tex/latex/xcolor/xcolor.sty
 Package: xcolor 2016/05/11 v2.12 LaTeX color extensions (UK)
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/color.cfg
 File: color.cfg 2016/01/02 v1.6 sample color configuration
 )
 Package xcolor Info: Driver file: pdftex.def on input line 225.
 Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1348.
 Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1352.
 Package xcolor Info: Model `RGB' extended on input line 1364.
 Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1366.
 Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1367.
 Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1368.
 Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1369.
 Package xcolor Info: Model `Gray' substituted by `gray' on input line 1370.
 Package xcolor Info: Model `wave' substituted by `hsb' on input line 1371.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/subfig/subfig.sty
 Package: subfig 2005/06/28 ver: 1.3 subfig package
 
 (/usr/share/texlive/texmf-dist/tex/latex/caption/caption.sty
 Package: caption 2016/02/21 v3.3-144 Customizing captions (AR)
 
 (/usr/share/texlive/texmf-dist/tex/latex/caption/caption3.sty
 Package: caption3 2016/05/22 v1.7-166 caption3 kernel (AR)
 Package caption3 Info: TeX engine: e-TeX on input line 67.
 \captionmargin=\dimen108
 \captionmargin@=\dimen109
 \captionwidth=\dimen110
 \caption@tempdima=\dimen111
 \caption@indent=\dimen112
 \caption@parindent=\dimen113
 \caption@hangindent=\dimen114
 )
 \c@ContinuedFloat=\count99
 )
 \c@KVtest=\count100
 \sf@farskip=\skip50
 \sf@captopadj=\dimen115
 \sf@capskip=\skip51
 \sf@nearskip=\skip52
 \c@subfigure=\count101
 \c@subfigure@save=\count102
 \c@lofdepth=\count103
 \c@subtable=\count104
 \c@subtable@save=\count105
 \c@lotdepth=\count106
 \sf@top=\skip53
 \sf@bottom=\skip54
 )
 (/usr/share/texlive/texmf-dist/tex/latex/booktabs/booktabs.sty
 Package: booktabs 2016/04/27 v1.618033 publication quality tables
 \heavyrulewidth=\dimen116
 \lightrulewidth=\dimen117
 \cmidrulewidth=\dimen118
 \belowrulesep=\dimen119
 \belowbottomsep=\dimen120
 \aboverulesep=\dimen121
 \abovetopsep=\dimen122
 \cmidrulesep=\dimen123
 \cmidrulekern=\dimen124
 \defaultaddspace=\dimen125
 \@cmidla=\count107
 \@cmidlb=\count108
 \@aboverulesep=\dimen126
 \@belowrulesep=\dimen127
 \@thisruleclass=\count109
 \@lastruleclass=\count110
 \@thisrulewidth=\dimen128
 )
 (/usr/share/texlive/texmf-dist/tex/latex/lipsum/lipsum.sty
 Package: lipsum 2014/07/27 v1.3 150 paragraphs of Lorem Ipsum dummy text
 \c@lips@count=\count111
 )
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.sty
 Package: microtype 2018/01/14 v2.7a Micro-typographical refinements (RS)
 \MT@toks=\toks18
 \MT@count=\count112
 LaTeX Info: Redefining \textls on input line 793.
 \MT@outer@kern=\dimen129
 LaTeX Info: Redefining \textmicrotypecontext on input line 1339.
 \MT@listname@count=\count113
 
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype-pdftex.def
 File: microtype-pdftex.def 2018/01/14 v2.7a Definitions specific to pdftex (RS)
 
 LaTeX Info: Redefining \lsstyle on input line 913.
 LaTeX Info: Redefining \lslig on input line 913.
 \MT@outer@space=\skip55
 )
 Package microtype Info: Loading configuration file microtype.cfg.
 
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.cfg
 File: microtype.cfg 2018/01/14 v2.7a microtype main configuration file (RS)
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/url/url.sty
 \Urlmuskip=\muskip10
 Package: url 2013/09/16  ver 3.4  Verb mode for urls, etc.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty
 Package: fancyhdr 2017/06/30 v3.9a Extensive control of page headers and footer
 s
 \f@nch@headwidth=\skip56
 \f@nch@O@elh=\skip57
 \f@nch@O@erh=\skip58
 \f@nch@O@olh=\skip59
 \f@nch@O@orh=\skip60
 \f@nch@O@elf=\skip61
 \f@nch@O@erf=\skip62
 \f@nch@O@olf=\skip63
 \f@nch@O@orf=\skip64
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.sty
 \lst@mode=\count114
 \lst@gtempboxa=\box26
 \lst@token=\toks19
 \lst@length=\count115
 \lst@currlwidth=\dimen130
 \lst@column=\count116
 \lst@pos=\count117
 \lst@lostspace=\dimen131
 \lst@width=\dimen132
 \lst@newlines=\count118
 \lst@lineno=\count119
 \lst@maxwidth=\dimen133
 
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty
 File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz)
 \c@lstnumber=\count120
 \lst@skipnumbers=\count121
 \lst@framebox=\box27
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.cfg
 File: listings.cfg 2015/06/04 1.6 listings configuration
 ))
 Package: listings 2015/06/04 1.6 (Carsten Heinz)
 
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty
 File: lstlang1.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty
 File: lstlang2.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty
 File: lstlang3.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty
 File: lstlang1.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty
 File: lstlang2.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty
 File: lstlang3.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty
 File: lstlang1.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty
 File: lstlang2.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty
 File: lstlang3.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty
 File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hyperref.sty
 Package: hyperref 2018/02/06 v6.86b Hypertext links for LaTeX
 
 (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty
 Package: hobsub-hyperref 2016/05/16 v1.14 Bundle oberdiek, subset hyperref (HO)
 
 
 (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty
 Package: hobsub-generic 2016/05/16 v1.14 Bundle oberdiek, subset generic (HO)
 Package: hobsub 2016/05/16 v1.14 Construct package bundles (HO)
 Package: infwarerr 2016/05/16 v1.4 Providing info/warning/error messages (HO)
 Package: ltxcmds 2016/05/16 v1.23 LaTeX kernel commands for general use (HO)
 Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO)
 Package ifluatex Info: LuaTeX not detected.
 Package: ifvtex 2016/05/16 v1.6 Detect VTeX and its facilities (HO)
 Package ifvtex Info: VTeX not detected.
 Package: intcalc 2016/05/16 v1.2 Expandable calculations with integers (HO)
 Package: ifpdf 2017/03/15 v3.2 Provides the ifpdf switch
 Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO)
 Package etexcmds Info: Could not find \expanded.
 (etexcmds)             That can mean that you are not using pdfTeX 1.50 or
 (etexcmds)             that some package has redefined \expanded.
 (etexcmds)             In the latter case, load this package earlier.
 Package: kvsetkeys 2016/05/16 v1.17 Key value parser (HO)
 Package: kvdefinekeys 2016/05/16 v1.4 Define keys (HO)
 Package: pdftexcmds 2018/01/21 v0.26 Utility functions of pdfTeX for LuaTeX (HO
 )
 Package pdftexcmds Info: LuaTeX not detected.
 Package pdftexcmds Info: \pdf@primitive is available.
 Package pdftexcmds Info: \pdf@ifprimitive is available.
 Package pdftexcmds Info: \pdfdraftmode found.
 Package: pdfescape 2016/05/16 v1.14 Implements pdfTeX's escape features (HO)
 Package: bigintcalc 2016/05/16 v1.4 Expandable calculations on big integers (HO
 )
 Package: bitset 2016/05/16 v1.2 Handle bit-vector datatype (HO)
 Package: uniquecounter 2016/05/16 v1.3 Provide unlimited unique counter (HO)
 )
 Package hobsub Info: Skipping package `hobsub' (already loaded).
 Package: letltxmacro 2016/05/16 v1.5 Let assignment for LaTeX macros (HO)
 Package: hopatch 2016/05/16 v1.3 Wrapper for package hooks (HO)
 Package: xcolor-patch 2016/05/16 xcolor patch
 Package: atveryend 2016/05/16 v1.9 Hooks at the very end of document (HO)
 Package atveryend Info: \enddocument detected (standard20110627).
 Package: atbegshi 2016/06/09 v1.18 At begin shipout hook (HO)
 Package: refcount 2016/05/16 v3.5 Data extraction from label references (HO)
 Package: hycolor 2016/05/16 v1.8 Color options for hyperref/bookmark (HO)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty
 Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional
 )
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/auxhook.sty
 Package: auxhook 2016/05/16 v1.4 Hooks for auxiliary files (HO)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/kvoptions.sty
 Package: kvoptions 2016/05/16 v3.12 Key value format for package options (HO)
 )
 \@linkdim=\dimen134
 \Hy@linkcounter=\count122
 \Hy@pagecounter=\count123
 
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/pd1enc.def
 File: pd1enc.def 2018/02/06 v6.86b Hyperref: PDFDocEncoding definition (HO)
 Now handling font encoding PD1 ...
 ... no UTF-8 mapping file for font encoding PD1
 )
 \Hy@SavedSpaceFactor=\count124
 
 (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/hyperref.cfg
 File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive
 )
 Package hyperref Info: Hyper figures OFF on input line 4509.
 Package hyperref Info: Link nesting OFF on input line 4514.
 Package hyperref Info: Hyper index ON on input line 4517.
 Package hyperref Info: Plain pages OFF on input line 4524.
 Package hyperref Info: Backreferencing OFF on input line 4529.
 Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
 Package hyperref Info: Bookmarks ON on input line 4762.
 \c@Hy@tempcnt=\count125
 LaTeX Info: Redefining \url on input line 5115.
 \XeTeXLinkMargin=\dimen135
 \Fld@menulength=\count126
 \Field@Width=\dimen136
 \Fld@charsize=\dimen137
 Package hyperref Info: Hyper figures OFF on input line 6369.
 Package hyperref Info: Link nesting OFF on input line 6374.
 Package hyperref Info: Hyper index ON on input line 6377.
 Package hyperref Info: backreferencing OFF on input line 6384.
 Package hyperref Info: Link coloring OFF on input line 6389.
 Package hyperref Info: Link coloring with OCG OFF on input line 6394.
 Package hyperref Info: PDF/A mode OFF on input line 6399.
 LaTeX Info: Redefining \ref on input line 6439.
 LaTeX Info: Redefining \pageref on input line 6443.
 \Hy@abspage=\count127
 \c@Item=\count128
 \c@Hfootnote=\count129
 )
 Package hyperref Info: Driver (autodetected): hpdftex.
 
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hpdftex.def
 File: hpdftex.def 2018/02/06 v6.86b Hyperref driver for pdfTeX
 \Fld@listcount=\count130
 \c@bookmark@seq@number=\count131
 
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty
 Package: rerunfilecheck 2016/05/16 v1.8 Rerun checks for auxiliary files (HO)
 Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2
 82.
 )
 \Hy@SectionHShift=\skip65
 )
 Package hyperref Info: Option `colorlinks' set `true' on input line 105.
 
 (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pdfpages.sty
 Package: pdfpages 2017/10/31 v0.5l Insert pages of external PDF documents (AM)
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ifthen.sty
 Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/tools/calc.sty
 Package: calc 2014/10/28 v4.3 Infix arithmetic (KKT,FJ)
 \calc@Acount=\count132
 \calc@Bcount=\count133
 \calc@Adimen=\dimen138
 \calc@Bdimen=\dimen139
 \calc@Askip=\skip66
 \calc@Bskip=\skip67
 LaTeX Info: Redefining \setlength on input line 80.
 LaTeX Info: Redefining \addtolength on input line 81.
 \calc@Ccount=\count134
 \calc@Cskip=\skip68
 )
 (/usr/share/texlive/texmf-dist/tex/latex/eso-pic/eso-pic.sty
 Package: eso-pic 2015/07/21 v2.0g eso-pic (RN)
 )
 \AM@pagewidth=\dimen140
 \AM@pageheight=\dimen141
 
 (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pppdftex.def
 File: pppdftex.def 2017/10/31 v0.5l Pdfpages driver for pdfTeX (AM)
 )
 \AM@pagebox=\box28
 \AM@global@opts=\toks20
 \AM@toc@title=\toks21
 \c@AM@survey=\count135
 \AM@templatesizebox=\box29
 )
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bookmark.sty
 Package: bookmark 2016/05/17 v1.26 PDF bookmarks (HO)
 
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bkm-pdftex.def
 File: bkm-pdftex.def 2016/05/17 v1.26 bookmark driver for pdfTeX (HO)
 \BKM@id=\count136
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex
 \pgfutil@everybye=\toks22
 \pgfutil@tempdima=\dimen142
 \pgfutil@tempdimb=\dimen143
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common-lists.t
 ex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def
 \pgfutil@abb=\box30
 (/usr/share/texlive/texmf-dist/tex/latex/ms/everyshi.sty
 Package: everyshi 2001/05/15 v3.00 EveryShipout Package (MS)
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex
 Package: pgfrcs 2015/08/07 v3.0.1a (rcs-revision 1.31)
 ))
 Package: pgf 2015/08/07 v3.0.1a (rcs-revision 1.15)
 
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex
 Package: pgfsys 2014/07/09 v3.0.1a (rcs-revision 1.48)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex
 \pgfkeys@pathtoks=\toks23
 \pgfkeys@temptoks=\toks24
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeysfiltered.code.t
 ex
 \pgfkeys@tmptoks=\toks25
 ))
 \pgf@x=\dimen144
 \pgf@y=\dimen145
 \pgf@xa=\dimen146
 \pgf@ya=\dimen147
 \pgf@xb=\dimen148
 \pgf@yb=\dimen149
 \pgf@xc=\dimen150
 \pgf@yc=\dimen151
 \w@pgf@writea=\write3
 \r@pgf@reada=\read1
 \c@pgf@counta=\count137
 \c@pgf@countb=\count138
 \c@pgf@countc=\count139
 \c@pgf@countd=\count140
 \t@pgf@toka=\toks26
 \t@pgf@tokb=\toks27
 \t@pgf@tokc=\toks28
  (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg
 File: pgf.cfg 2008/05/14  (rcs-revision 1.7)
 )
 Driver file for pgf: pgfsys-pdftex.def
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def
 File: pgfsys-pdftex.def 2014/10/11  (rcs-revision 1.35)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.de
 f
 File: pgfsys-common-pdf.def 2013/10/10  (rcs-revision 1.13)
 )))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.
 tex
 File: pgfsyssoftpath.code.tex 2013/09/09  (rcs-revision 1.9)
 \pgfsyssoftpath@smallbuffer@items=\count141
 \pgfsyssoftpath@bigbuffer@items=\count142
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.
 tex
 File: pgfsysprotocol.code.tex 2006/10/16  (rcs-revision 1.4)
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex
 Package: pgfcore 2010/04/11 v3.0.1a (rcs-revision 1.7)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex
 \pgfmath@dimen=\dimen152
 \pgfmath@count=\count143
 \pgfmath@box=\box31
 \pgfmath@toks=\toks29
 \pgfmath@stack@operand=\toks30
 \pgfmath@stack@operation=\toks31
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code
 .tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonomet
 ric.code.tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.cod
 e.tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison
 .code.tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.
 tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code
 .tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.
 tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerari
 thmetics.code.tex)))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex
 \c@pgfmathroundto@lastzeros=\count144
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.te
 x
 File: pgfcorepoints.code.tex 2013/10/07  (rcs-revision 1.27)
 \pgf@picminx=\dimen153
 \pgf@picmaxx=\dimen154
 \pgf@picminy=\dimen155
 \pgf@picmaxy=\dimen156
 \pgf@pathminx=\dimen157
 \pgf@pathmaxx=\dimen158
 \pgf@pathminy=\dimen159
 \pgf@pathmaxy=\dimen160
 \pgf@xx=\dimen161
 \pgf@xy=\dimen162
 \pgf@yx=\dimen163
 \pgf@yy=\dimen164
 \pgf@zx=\dimen165
 \pgf@zy=\dimen166
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.
 code.tex
 File: pgfcorepathconstruct.code.tex 2013/10/07  (rcs-revision 1.29)
 \pgf@path@lastx=\dimen167
 \pgf@path@lasty=\dimen168
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code
 .tex
 File: pgfcorepathusage.code.tex 2014/11/02  (rcs-revision 1.24)
 \pgf@shorten@end@additional=\dimen169
 \pgf@shorten@start@additional=\dimen170
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.te
 x
 File: pgfcorescopes.code.tex 2015/05/08  (rcs-revision 1.46)
 \pgfpic=\box32
 \pgf@hbox=\box33
 \pgf@layerbox@main=\box34
 \pgf@picture@serial@count=\count145
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.c
 ode.tex
 File: pgfcoregraphicstate.code.tex 2014/11/02  (rcs-revision 1.12)
 \pgflinewidth=\dimen171
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformation
 s.code.tex
 File: pgfcoretransformations.code.tex 2015/08/07  (rcs-revision 1.20)
 \pgf@pt@x=\dimen172
 \pgf@pt@y=\dimen173
 \pgf@pt@temp=\dimen174
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex
 File: pgfcorequick.code.tex 2008/10/09  (rcs-revision 1.3)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.t
 ex
 File: pgfcoreobjects.code.tex 2006/10/11  (rcs-revision 1.2)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing
 .code.tex
 File: pgfcorepathprocessing.code.tex 2013/09/09  (rcs-revision 1.9)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.te
 x
 File: pgfcorearrows.code.tex 2015/05/14  (rcs-revision 1.43)
 \pgfarrowsep=\dimen175
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex
 File: pgfcoreshade.code.tex 2013/07/15  (rcs-revision 1.15)
 \pgf@max=\dimen176
 \pgf@sys@shading@range@num=\count146
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex
 File: pgfcoreimage.code.tex 2013/07/15  (rcs-revision 1.18)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.
 tex
 File: pgfcoreexternal.code.tex 2014/07/09  (rcs-revision 1.21)
 \pgfexternal@startupbox=\box35
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.te
 x
 File: pgfcorelayers.code.tex 2013/07/18  (rcs-revision 1.7)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.c
 ode.tex
 File: pgfcoretransparency.code.tex 2013/09/30  (rcs-revision 1.5)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.
 tex
 File: pgfcorepatterns.code.tex 2013/11/07  (rcs-revision 1.5)
 )))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex
 File: pgfmoduleshapes.code.tex 2014/03/21  (rcs-revision 1.35)
 \pgfnodeparttextbox=\box36
 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex
 File: pgfmoduleplot.code.tex 2015/08/03  (rcs-revision 1.13)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65
 .sty
 Package: pgfcomp-version-0-65 2007/07/03 v3.0.1a (rcs-revision 1.7)
 \pgf@nodesepstart=\dimen177
 \pgf@nodesepend=\dimen178
 )
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18
 .sty
 Package: pgfcomp-version-1-18 2007/07/23 v3.0.1a (rcs-revision 1.1)
 )) (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgffor.sty
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex))
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/math/pgfmath.sty
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex
 Package: pgffor 2013/12/13 v3.0.1a (rcs-revision 1.25)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)
 \pgffor@iter=\dimen179
 \pgffor@skip=\dimen180
 \pgffor@stack=\toks32
 \pgffor@toks=\toks33
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex
 Package: tikz 2015/08/07 v3.0.1a (rcs-revision 1.151)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers
 .code.tex
 File: pgflibraryplothandlers.code.tex 2013/08/31 v3.0.1a (rcs-revision 1.20)
 \pgf@plot@mark@count=\count147
 \pgfplotmarksize=\dimen181
 )
 \tikz@lastx=\dimen182
 \tikz@lasty=\dimen183
 \tikz@lastxsaved=\dimen184
 \tikz@lastysaved=\dimen185
 \tikzleveldistance=\dimen186
 \tikzsiblingdistance=\dimen187
 \tikz@figbox=\box37
 \tikz@figbox@bg=\box38
 \tikz@tempbox=\box39
 \tikz@tempbox@bg=\box40
 \tikztreelevel=\count148
 \tikznumberofchildren=\count149
 \tikznumberofcurrentchild=\count150
 \tikz@fig@count=\count151
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex
 File: pgfmodulematrix.code.tex 2013/09/17  (rcs-revision 1.8)
 \pgfmatrixcurrentrow=\count152
 \pgfmatrixcurrentcolumn=\count153
 \pgf@matrix@numberofcolumns=\count154
 )
 \tikz@expandcount=\count155
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik
 zlibrarytopaths.code.tex
 File: tikzlibrarytopaths.code.tex 2008/06/17 v3.0.1a (rcs-revision 1.2)
 )))
 (/usr/share/texlive/texmf-dist/tex/latex/titlesec/titlesec.sty
 Package: titlesec 2016/03/21 v2.10.2 Sectioning titles
 \ttl@box=\box41
 \beforetitleunit=\skip69
 \aftertitleunit=\skip70
 \ttl@plus=\dimen188
 \ttl@minus=\dimen189
 \ttl@toksa=\toks34
 \titlewidth=\dimen190
 \titlewidthlast=\dimen191
 \titlewidthfirst=\dimen192
 )
 (/usr/share/texlive/texmf-dist/tex/latex/titlesec/ttlkeys.def
 File: ttlkeys.def 2016/03/15
 \c@ttlp@side=\count156
 \ttlp@side=\count157
 )
 \c@myparts=\count158
 
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty
 Package: amsmath 2017/09/02 v2.17a AMS math features
 \@mathmargin=\skip71
 
 For additional information on amsmath, use the `?' option.
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty
 Package: amstext 2000/06/29 v2.01 AMS text
 
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty
 File: amsgen.sty 1999/11/30 v2.0 generic functions
 \@emptytoks=\toks35
 \ex@=\dimen193
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty
 Package: amsbsy 1999/11/29 v1.2d Bold Symbols
 \pmbraise@=\dimen194
 )
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty
 Package: amsopn 2016/03/08 v2.02 operator names
 )
 \inf@bad=\count159
 LaTeX Info: Redefining \frac on input line 213.
 \uproot@=\count160
 \leftroot@=\count161
 LaTeX Info: Redefining \overline on input line 375.
 \classnum@=\count162
 \DOTSCASE@=\count163
 LaTeX Info: Redefining \ldots on input line 472.
 LaTeX Info: Redefining \dots on input line 475.
 LaTeX Info: Redefining \cdots on input line 596.
 \Mathstrutbox@=\box42
 \strutbox@=\box43
 \big@size=\dimen195
 LaTeX Font Info:    Redeclaring font encoding OML on input line 712.
 LaTeX Font Info:    Redeclaring font encoding OMS on input line 713.
 \macc@depth=\count164
 \c@MaxMatrixCols=\count165
 \dotsspace@=\muskip11
 \c@parentequation=\count166
 \dspbrk@lvl=\count167
 \tag@help=\toks36
 \row@=\count168
 \column@=\count169
 \maxfields@=\count170
 \andhelp@=\toks37
 \eqnshift@=\dimen196
 \alignsep@=\dimen197
 \tagshift@=\dimen198
 \tagwidth@=\dimen199
 \totwidth@=\dimen256
 \lineht@=\dimen257
 \@envbody=\toks38
 \multlinegap=\skip72
 \multlinetaggap=\skip73
 \mathdisplay@stack=\toks39
 LaTeX Info: Redefining \[ on input line 2817.
 LaTeX Info: Redefining \] on input line 2818.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amsfonts.sty
 Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
 \symAMSa=\mathgroup6
 \symAMSb=\mathgroup7
 LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
 (Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
 LaTeX Font Info:    Redeclaring math symbol \square on input line 141.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amssymb.sty
 Package: amssymb 2013/01/14 v3.01 AMS font symbols
 LaTeX Font Info:    Redeclaring math symbol \blacksquare on input line 48.
 LaTeX Font Info:    Redeclaring math symbol \vDash on input line 60.
 LaTeX Font Info:    Redeclaring math symbol \leftleftarrows on input line 63.
 LaTeX Font Info:    Redeclaring math symbol \rightrightarrows on input line 64.
 
 LaTeX Font Info:    Redeclaring math symbol \leqslant on input line 101.
 LaTeX Font Info:    Redeclaring math symbol \geqslant on input line 108.
 LaTeX Font Info:    Redeclaring math symbol \blacktriangleright on input line 1
 20.
 LaTeX Font Info:    Redeclaring math symbol \blacktriangleleft on input line 12
 1.
 LaTeX Font Info:    Redeclaring math symbol \complement on input line 165.
 LaTeX Font Info:    Redeclaring math symbol \intercal on input line 166.
 LaTeX Font Info:    Redeclaring math symbol \nleqslant on input line 181.
 LaTeX Font Info:    Redeclaring math symbol \ngeqslant on input line 182.
 LaTeX Font Info:    Redeclaring math symbol \varsubsetneq on input line 203.
 LaTeX Font Info:    Redeclaring math symbol \subsetneqq on input line 207.
 LaTeX Font Info:    Redeclaring math symbol \nparallel on input line 215.
 LaTeX Font Info:    Redeclaring math symbol \nvDash on input line 221.
 LaTeX Font Info:    Redeclaring math symbol \nexists on input line 235.
 LaTeX Font Info:    Redeclaring math symbol \smallsetminus on input line 251.
 LaTeX Font Info:    Redeclaring math symbol \curvearrowleft on input line 257.
 LaTeX Font Info:    Redeclaring math symbol \curvearrowright on input line 258.
 
 LaTeX Font Info:    Redeclaring math symbol \varkappa on input line 260.
 LaTeX Font Info:    Redeclaring math symbol \hslash on input line 262.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mathtools.sty
 Package: mathtools 2018/01/08 v1.21 mathematical typesetting tools
 
 (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mhsetup.sty
 Package: mhsetup 2017/03/31 v1.3 programming setup (MH)
 )
 LaTeX Info: Thecontrolsequence`\('isalreadyrobust on input line 129.
 LaTeX Info: Thecontrolsequence`\)'isalreadyrobust on input line 129.
 LaTeX Info: Thecontrolsequence`\['isalreadyrobust on input line 129.
 LaTeX Info: Thecontrolsequence`\]'isalreadyrobust on input line 129.
 \g_MT_multlinerow_int=\count171
 \l_MT_multwidth_dim=\dimen258
 \origjot=\skip74
 \l_MT_shortvdotswithinadjustabove_dim=\dimen259
 \l_MT_shortvdotswithinadjustbelow_dim=\dimen260
 \l_MT_above_intertext_sep=\dimen261
 \l_MT_below_intertext_sep=\dimen262
 \l_MT_above_shortintertext_sep=\dimen263
 \l_MT_below_shortintertext_sep=\dimen264
 ))
 (./head/settings_custom.tex
 (/usr/share/texlive/texmf-dist/tex/latex/algorithm2e/algorithm2e.sty
 Package: algorithm2e 2017/07/18 v5.2 algorithms environments
 \c@AlgoLine=\count172
 \algocf@hangindent=\skip75
 
 (/usr/share/texlive/texmf-dist/tex/latex/ifoddpage/ifoddpage.sty
 Package: ifoddpage 2016/04/23 v1.1 Conditionals for odd/even page detection
 \c@checkoddpage=\count173
 )
 (/usr/share/texlive/texmf-dist/tex/latex/tools/xspace.sty
 Package: xspace 2014/10/28 v1.13 Space after command names (DPC,MH)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/relsize/relsize.sty
 Package: relsize 2013/03/29 ver 4.1
 )
 \skiptotal=\skip76
 \skiplinenumber=\skip77
 \skiprule=\skip78
 \skiphlne=\skip79
 \skiptext=\skip80
 \skiplength=\skip81
 \algomargin=\skip82
 \skipalgocfslide=\skip83
 \algowidth=\dimen265
 \inoutsize=\dimen266
 \inoutindent=\dimen267
 \interspacetitleruled=\dimen268
 \interspacealgoruled=\dimen269
 \interspacetitleboxruled=\dimen270
 \algocf@ruledwidth=\skip84
 \algocf@inoutbox=\box44
 \algocf@inputbox=\box45
 \AlCapSkip=\skip85
 \AlCapHSkip=\skip86
 \algoskipindent=\skip87
 \algocf@nlbox=\box46
 \algocf@hangingbox=\box47
 \algocf@untilbox=\box48
 \algocf@skipuntil=\skip88
 \algocf@capbox=\box49
 \algocf@lcaptionbox=\skip89
 \algoheightruledefault=\skip90
 \algoheightrule=\skip91
 \algotitleheightruledefault=\skip92
 \algotitleheightrule=\skip93
 \c@algocfline=\count174
 \c@algocfproc=\count175
 \c@algocf=\count176
 \algocf@algoframe=\box50
 \algocf@algobox=\box51
 )
 (/usr/share/texlive/texmf-dist/tex/latex/float/float.sty
 Package: float 2001/11/08 v1.3d Float enhancements (AL)
 \c@float@type=\count177
 \float@exts=\toks40
 \float@box=\box52
 \@float@everytoks=\toks41
 \@floatcapt=\box53
 )
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/xr-hyper.sty
 Package: xr-hyper 2000/03/22 v6.00beta4 eXternal References (DPC)
 
 
 Package xr-hyper Warning: Load package `hyperref' after `xr-hyper'.
 
 ) (/usr/share/texlive/texmf-dist/tex/latex/makecell/makecell.sty
 Package: makecell 2009/08/03 V0.1e Managing of Tab Column Heads and Cells
 
 (/usr/share/texlive/texmf-dist/tex/latex/tools/array.sty
 Package: array 2016/10/06 v2.4d Tabular extension package (FMi)
 \col@sep=\dimen271
 \extrarowheight=\dimen272
 \NC@list=\toks42
 \extratabsurround=\skip94
 \backup@length=\skip95
 )
 \rotheadsize=\dimen273
 \c@nlinenum=\count178
 \TeXr@lab=\toks43
 )
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.sty
 (/usr/share/texlive/texmf-dist/tex/latex/l3kernel/expl3.sty
 Package: expl3 2018/02/21 L3 programming layer (loader) 
 
 (/usr/share/texlive/texmf-dist/tex/latex/l3kernel/expl3-code.tex
 Package: expl3 2018/02/21 L3 programming layer (code)
 \c_max_int=\count179
 \l_tmpa_int=\count180
 \l_tmpb_int=\count181
 \g_tmpa_int=\count182
 \g_tmpb_int=\count183
 \g__intarray_font_int=\count184
 \g__prg_map_int=\count185
 \c_log_iow=\count186
 \l_iow_line_count_int=\count187
 \l__iow_line_target_int=\count188
 \l__iow_one_indent_int=\count189
 \l__iow_indent_int=\count190
 \c_zero_dim=\dimen274
 \c_max_dim=\dimen275
 \l_tmpa_dim=\dimen276
 \l_tmpb_dim=\dimen277
 \g_tmpa_dim=\dimen278
 \g_tmpb_dim=\dimen279
 \c_zero_skip=\skip96
 \c_max_skip=\skip97
 \l_tmpa_skip=\skip98
 \l_tmpb_skip=\skip99
 \g_tmpa_skip=\skip100
 \g_tmpb_skip=\skip101
 \c_zero_muskip=\muskip12
 \c_max_muskip=\muskip13
 \l_tmpa_muskip=\muskip14
 \l_tmpb_muskip=\muskip15
 \g_tmpa_muskip=\muskip16
 \g_tmpb_muskip=\muskip17
 \l_keys_choice_int=\count191
 \c__fp_leading_shift_int=\count192
 \c__fp_middle_shift_int=\count193
 \c__fp_trailing_shift_int=\count194
 \c__fp_big_leading_shift_int=\count195
 \c__fp_big_middle_shift_int=\count196
 \c__fp_big_trailing_shift_int=\count197
 \c__fp_Bigg_leading_shift_int=\count198
 \c__fp_Bigg_middle_shift_int=\count199
 \c__fp_Bigg_trailing_shift_int=\count266
 \c__fp_rand_size_int=\count267
 \c__fp_rand_four_int=\count268
 \c__fp_rand_eight_int=\count269
 \l__sort_length_int=\count270
 \l__sort_min_int=\count271
 \l__sort_top_int=\count272
 \l__sort_max_int=\count273
 \l__sort_true_max_int=\count274
 \l__sort_block_int=\count275
 \l__sort_begin_int=\count276
 \l__sort_end_int=\count277
 \l__sort_A_int=\count278
 \l__sort_B_int=\count279
 \l__sort_C_int=\count280
 \l__tl_build_start_index_int=\count281
 \l__tl_build_index_int=\count282
 \l__tl_analysis_normal_int=\count283
 \l__tl_analysis_index_int=\count284
 \l__tl_analysis_nesting_int=\count285
 \l__tl_analysis_type_int=\count286
 \l__regex_internal_a_int=\count287
 \l__regex_internal_b_int=\count288
 \l__regex_internal_c_int=\count289
 \l__regex_balance_int=\count290
 \l__regex_group_level_int=\count291
 \l__regex_mode_int=\count292
 \c__regex_cs_in_class_mode_int=\count293
 \c__regex_cs_mode_int=\count294
 \l__regex_catcodes_int=\count295
 \l__regex_default_catcodes_int=\count296
 \c__regex_catcode_D_int=\count297
 \c__regex_catcode_S_int=\count298
 \c__regex_catcode_L_int=\count299
 \c__regex_catcode_O_int=\count300
 \c__regex_catcode_A_int=\count301
 \c__regex_all_catcodes_int=\count302
 \l__regex_show_lines_int=\count303
 \l__regex_min_state_int=\count304
 \l__regex_max_state_int=\count305
 \l__regex_left_state_int=\count306
 \l__regex_right_state_int=\count307
 \l__regex_capturing_group_int=\count308
 \l__regex_min_pos_int=\count309
 \l__regex_max_pos_int=\count310
 \l__regex_curr_pos_int=\count311
 \l__regex_start_pos_int=\count312
 \l__regex_success_pos_int=\count313
 \l__regex_curr_char_int=\count314
 \l__regex_curr_catcode_int=\count315
 \l__regex_last_char_int=\count316
 \l__regex_case_changed_char_int=\count317
 \l__regex_curr_state_int=\count318
 \l__regex_step_int=\count319
 \l__regex_min_active_int=\count320
 \l__regex_max_active_int=\count321
 \l__regex_replacement_csnames_int=\count322
 \l__regex_match_count_int=\count323
 \l__regex_min_submatch_int=\count324
 \l__regex_submatch_int=\count325
 \l__regex_zeroth_submatch_int=\count326
 \g__regex_trace_regex_int=\count327
 \c_empty_box=\box54
 \l_tmpa_box=\box55
 \l_tmpb_box=\box56
 \g_tmpa_box=\box57
 \g_tmpb_box=\box58
 \l__box_top_dim=\dimen280
 \l__box_bottom_dim=\dimen281
 \l__box_left_dim=\dimen282
 \l__box_right_dim=\dimen283
 \l__box_top_new_dim=\dimen284
 \l__box_bottom_new_dim=\dimen285
 \l__box_left_new_dim=\dimen286
 \l__box_right_new_dim=\dimen287
 \l__box_internal_box=\box59
 \l__coffin_internal_box=\box60
 \l__coffin_internal_dim=\dimen288
 \l__coffin_offset_x_dim=\dimen289
 \l__coffin_offset_y_dim=\dimen290
 \l__coffin_x_dim=\dimen291
 \l__coffin_y_dim=\dimen292
 \l__coffin_x_prime_dim=\dimen293
 \l__coffin_y_prime_dim=\dimen294
 \c_empty_coffin=\box61
 \l__coffin_aligned_coffin=\box62
 \l__coffin_aligned_internal_coffin=\box63
 \l_tmpa_coffin=\box64
 \l_tmpb_coffin=\box65
 \l__coffin_display_coffin=\box66
 \l__coffin_display_coord_coffin=\box67
 \l__coffin_display_pole_coffin=\box68
 \l__coffin_display_offset_dim=\dimen295
 \l__coffin_display_x_dim=\dimen296
 \l__coffin_display_y_dim=\dimen297
 \l__coffin_bounding_shift_dim=\dimen298
 \l__coffin_left_corner_dim=\dimen299
 \l__coffin_right_corner_dim=\dimen300
 \l__coffin_bottom_corner_dim=\dimen301
 \l__coffin_top_corner_dim=\dimen302
 \l__coffin_scaled_total_height_dim=\dimen303
 \l__coffin_scaled_width_dim=\dimen304
 )
 (/usr/share/texlive/texmf-dist/tex/latex/l3kernel/l3pdfmode.def
 File: l3pdfmode.def 2017/03/18 v L3 Experimental driver: PDF mode
 \l__driver_color_stack_int=\count328
 \l__driver_tmp_box=\box69
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/l3packages/xparse/xparse.sty
 Package: xparse 2018/02/21 L3 Experimental document command parser
 \l__xparse_current_arg_int=\count329
 \g__xparse_grabber_int=\count330
 \l__xparse_m_args_int=\count331
 \l__xparse_mandatory_args_int=\count332
 \l__xparse_v_nesting_int=\count333
 )
 (/usr/share/texlive/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty
 Package: l3keys2e 2018/02/21 LaTeX2e option processing using LaTeX3 keys
 )
 Package: chemmacros 2017/08/28 v5.8b comprehensive support for typesetting chem
 istry documents (CN)
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \IfChemCompatibilityTF with sig. 'mm+m+m' on line 190.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \IfChemCompatibilityT with sig. 'mm+m' on line 193.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \IfChemCompatibilityF with sig. 'mm+m' on line 196.
 .................................................
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros5.sty
 Package: chemmacros5 2017/08/28 v5.8b comprehensive support for typesetting che
 mistry documents (CN)
 \l__chemmacros_tmpa_dim=\dimen305
 \l__chemmacros_tmpb_dim=\dimen306
 \l__chemmacros_tmpc_dim=\dimen307
 \l__chemmacros_tmpa_int=\count334
 \l__chemmacros_tmpb_int=\count335
 \l__chemmacros_tmpc_int=\count336
 \l__chemmacros_tmpa_box=\box70
 \l__chemmacros_tmpb_box=\box71
 \l__chemmacros_tmpc_box=\box72
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ChemModule with sig. 'smmO{5.0}' on line 258.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \usechemmodule with sig. 'm' on line 262.
 .................................................
 \g__file_internal_ior=\read2
 (chemmacros)    Loading module `base'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.base.code
 .tex
 File: chemmacros.module.base.code.tex 2017/08/28 v5.8b chemmacros module `base'
  2017/08/28 basic chemmacros module
  (/usr/share/texlive/texmf-dist/tex/latex/etoolbox/etoolbox.sty
 Package: etoolbox 2018/02/11 v2.5e e-TeX tools for LaTeX (JAW)
 \etb@tempcnta=\count337
 )
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemDeprecated with sig. 'mm' on line 53.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemMacroset with sig. 'smmm' on line 151.
 .................................................
 
 (/usr/share/texlive/texmf-dist/tex/latex/koma-script/scrlfile.sty
 Package: scrlfile 2017/09/07 v3.24 KOMA-Script package (loading files)
 )
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ChemCleverefSupport with sig. 'mmomo' on line 356.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ChemFancyrefSupport with sig. 'mmo' on line 356.
 .................................................
 
 (/usr/share/texlive/texmf-dist/tex/latex/tools/bm.sty
 Package: bm 2017/01/16 v1.2c Bold Symbol Support (DPC/FMi)
 \symboldoperators=\mathgroup8
 \symboldletters=\mathgroup9
 \symboldotherletters=\mathgroup10
 LaTeX Font Info:    Redeclaring math alphabet \mathbf on input line 141.
 LaTeX Info: Redefining \bm on input line 207.
 )
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \chemsetup with sig. 'om' on line 428.
 .................................................
 (chemmacros)    Loading module `errorcheck'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.errorchec
 k.code.tex
 File: chemmacros.module.errorcheck.code.tex 2017/08/28 v5.8b chemmacros module 
 `errorcheck' 2016/10/05 error checking for unloaded modules
 ))
 (chemmacros)    Loading module `lang'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.lang.code
 .tex
 File: chemmacros.module.lang.code.tex 2017/08/28 v5.8b chemmacros module `lang'
  2016/05/31 language settings for chemmacros
  (/usr/share/texlive/texmf-dist/tex/latex/translations/translations.sty
 Package: translations 2017/08/31 v1.7a internationalization of LaTeX2e packages
  (CN)
 )
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ChemTranslate with sig. 'm' on line 68.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemTranslations with sig. 'mm' on line 140.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemTranslation with sig. 'mmm' on line 144.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ForAllChemTranslationsDo with sig. '+m' on line 162.
 .................................................
 )
 (chemmacros)    Loading module `greek'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.greek.cod
 e.tex
 File: chemmacros.module.greek.code.tex 2017/08/28 v5.8b chemmacros module `gree
 k' 2015/06/09 upright greek symbols
  (/usr/share/texlive/texmf-dist/tex/latex/chemgreek/chemgreek.sty
 Package: chemgreek 2016/12/20 v1.1 interfaceforuprightgreeklettersforuseinchemi
 stry (CN)
 \l__chemgreek_tmpa_int=\count338
 \g__chemgreek_tmpa_int=\count339
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \newchemgreekmapping with sig. 'O{}mm' on line 336.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \renewchemgreekmapping with sig. 'O{}mm' on line 339.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \declarechemgreekmapping with sig. 'O{}mm' on line 342.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \newchemgreekmappingalias with sig. 'mm' on line 347.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \renewchemgreekmappingalias with sig. 'mm' on line 350.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \declarechemgreekmappingalias with sig. 'mm' on line 353.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \changechemgreeksymbol with sig. 'mmmm' on line 383.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \chemgreekmappingsymbol with sig. 'mm' on line 477.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \activatechemgreekmapping with sig. 'sm' on line 486.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \selectchemgreekmapping with sig. 'm' on line 491.
 .................................................
 ))
 (chemmacros)    Loading module `chemformula'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.chemformu
 la.code.tex
 File: chemmacros.module.chemformula.code.tex 2017/08/28 v5.8b chemmacros module
  `chemformula' 2016/05/03 integration of chemical formulas
 (chemmacros)    Loading module `charges'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.charges.c
 ode.tex
 File: chemmacros.module.charges.code.tex 2017/08/28 v5.8b chemmacros module `ch
 arges' 2015/07/30 charges
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemCharge with sig. 'mm' on line 122.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemCharge with sig. 'mm' on line 122.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemCharge with sig. 'mm' on line 122.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemCharge with sig. 'mm' on line 122.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemPartialCharge with sig. 'mm' on line 125.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemPartialCharge with sig. 'mm' on line 125.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemPartialCharge with sig. 'mm' on line 125.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemPartialCharge with sig. 'mm' on line 125.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \mch with sig. 'o' on line 146.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \pch with sig. 'o' on line 147.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \fmch with sig. 'o' on line 148.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \fpch with sig. 'o' on line 149.
 .................................................
 ))
 (chemmacros)    Loading module `acid-base'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.acid-base
 .code.tex
 File: chemmacros.module.acid-base.code.tex 2017/08/28 v5.8b chemmacros module `
 acid-base' 2016/05/31 acid/base
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemEqConstant with sig. 'mmm' on line 87.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemEqConstant with sig. 'mmm' on line 87.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemEqConstant with sig. 'mmm' on line 87.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemEqConstant with sig. 'mmm' on line 87.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \p with sig. 'm' on line 119.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \pH with sig. '' on line 120.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \pOH with sig. '' on line 121.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \pKa with sig. 'o' on line 130.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \pKb with sig. 'o' on line 139.
 .................................................
 )
 (chemmacros)    Loading module `symbols'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.symbols.c
 ode.tex
 File: chemmacros.module.symbols.code.tex 2017/08/28 v5.8b chemmacros module `sy
 mbols' 2015/06/09 symbols
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \standardstate with sig. '' on line 67.
 .................................................
 )
 (chemmacros)    Loading module `particles'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.particles
 .code.tex
 File: chemmacros.module.particles.code.tex 2017/08/28 v5.8b chemmacros module `
 particles' 2016/04/02 particles
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemParticle with sig. 'mm' on line 45.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemParticle with sig. 'mm' on line 45.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemParticle with sig. 'mm' on line 45.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemParticle with sig. 'mm' on line 45.
 .................................................
 \l__chemmacros_nucleophile_dim=\dimen308
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemNucleophile with sig. 'mm' on line 111.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemNucleophile with sig. 'mm' on line 111.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemNucleophile with sig. 'mm' on line 111.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemNucleophile with sig. 'mm' on line 111.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \Nuc with sig. 'o' on line 130.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ba with sig. 'o' on line 131.
 .................................................
 )
 (chemmacros)    Loading module `phases'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.phases.co
 de.tex
 File: chemmacros.module.phases.code.tex 2017/08/28 v5.8b chemmacros module `pha
 ses' 2016/05/31 phase descriptors
 \l__chemmacros_phases_space_dim=\dimen309
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemPhase with sig. 'mm' on line 45.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemPhase with sig. 'mm' on line 45.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemPhase with sig. 'mm' on line 45.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemPhase with sig. 'mm' on line 45.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \phase with sig. 'm' on line 93.
 .................................................
 .................................................
 . LaTeX info: "xparse/redefine-command"
 . 
 . Redefining command \sld with sig. 'o' on line 95.
 .................................................
 .................................................
 . LaTeX info: "xparse/redefine-command"
 . 
 . Redefining command \lqd with sig. 'o' on line 96.
 .................................................
 .................................................
 . LaTeX info: "xparse/redefine-command"
 . 
 . Redefining command \gas with sig. 'o' on line 97.
 .................................................
 .................................................
 . LaTeX info: "xparse/redefine-command"
 . 
 . Redefining command \aq with sig. 'o' on line 98.
 .................................................
 )
 (chemmacros)    Loading module `nomenclature'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.nomenclat
 ure.code.tex
 File: chemmacros.module.nomenclature.code.tex 2017/08/28 v5.8b chemmacros modul
 e `nomenclature' 2017/06/11 chemical names
 (chemmacros)    Loading module `tikz'...
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.tikz.code
 .tex
 File: chemmacros.module.tikz.code.tex 2017/08/28 v5.8b chemmacros module `tikz'
  2015/10/26 upright greek symbols
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik
 zlibrarycalc.code.tex
 File: tikzlibrarycalc.code.tex 2013/07/15 v3.0.1a (rcs-revision 1.9)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik
 zlibrarydecorations.pathmorphing.code.tex
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik
 zlibrarydecorations.code.tex
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduledecorations.cod
 e.tex
 \pgfdecoratedcompleteddistance=\dimen310
 \pgfdecoratedremainingdistance=\dimen311
 \pgfdecoratedinputsegmentcompleteddistance=\dimen312
 \pgfdecoratedinputsegmentremainingdistance=\dimen313
 \pgf@decorate@distancetomove=\dimen314
 \pgf@decorate@repeatstate=\count340
 \pgfdecorationsegmentamplitude=\dimen315
 \pgfdecorationsegmentlength=\dimen316
 )
 \tikz@lib@dec@box=\box73
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/libraries/decorations/pgflibrary
 decorations.pathmorphing.code.tex))
 \l__chemmacros_el_length_dim=\dimen317
 )
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemIUPAC with sig. 'mm' on line 209.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemIUPAC with sig. 'mm' on line 212.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemIUPAC with sig. 'mm' on line 215.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemIUPAC with sig. 'mm' on line 218.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \LetChemIUPAC with sig. 'mm' on line 221.
 .................................................
 \l__chemmacros_cip_kern_dim=\dimen318
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \Sconf with sig. 'O{S}' on line 349.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \Rconf with sig. 'O{R}' on line 350.
 .................................................
 \l__chemmacros_iupac_hyphen_pre_dim=\dimen319
 \l__chemmacros_iupac_hyphen_post_dim=\dimen320
 \l__chemmacros_iupac_break_dim=\dimen321
 \l__chemmacros_iupac_break_skip=\skip102
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemIUPACShorthand with sig. 'mm' on line 604.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemIUPACShorthand with sig. 'mm' on line 611.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemIUPACShorthand with sig. 'mm' on line 617.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemIUPACShorthand with sig. 'mm' on line 624.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RemoveChemIUPACShorthand with sig. 'm' on line 627.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \iupac with sig. 'O{}m' on line 673.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemLatin with sig. 'mm' on line 755.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemLatin with sig. 'mm' on line 755.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemLatin with sig. 'mm' on line 755.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemLatin with sig. 'mm' on line 755.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \latin with sig. 'O{}m' on line 826.
 .................................................
 ))))
 .................................................
 . chemmacros info: "default-formula-method"
 . 
 . You haven't chosen a formula method so I'm assuming the default method
 . `chemformula'.
 .................................................
 
 (/usr/share/texlive/texmf-dist/tex/latex/chemformula/chemformula.sty
 (/usr/share/texlive/texmf-dist/tex/latex/l3packages/xfrac/xfrac.sty
 (/usr/share/texlive/texmf-dist/tex/latex/l3packages/xtemplate/xtemplate.sty
 Package: xtemplate 2018/02/21 L3 Experimental prototype document functions
 \l__xtemplate_tmp_dim=\dimen322
 \l__xtemplate_tmp_int=\count341
 \l__xtemplate_tmp_muskip=\muskip18
 \l__xtemplate_tmp_skip=\skip103
 )
 Package: xfrac 2018/02/21 L3 Experimental split-level fractions
 \l__xfrac_slash_box=\box74
 \l__xfrac_tmp_box=\box75
 \l__xfrac_denominator_bot_sep_dim=\dimen323
 \l__xfrac_numerator_bot_sep_dim=\dimen324
 \l__xfrac_numerator_top_sep_dim=\dimen325
 \l__xfrac_slash_left_sep_dim=\dimen326
 \l__xfrac_slash_right_sep_dim=\dimen327
 \l__xfrac_slash_left_muskip=\muskip19
 \l__xfrac_slash_right_muskip=\muskip20
 .................................................
 . xtemplate info: "declare-object-type"
 . 
 . Declaring object type 'xfrac' taking 3 argument(s) on line 80.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \sfrac with sig. 'omom' on line 420.
 .................................................
 )
 (/usr/share/texlive/texmf-dist/tex/latex/units/nicefrac.sty
 Package: nicefrac 1998/08/04 v0.9b Nice fractions
 \L@UnitsRaiseDisplaystyle=\skip104
 \L@UnitsRaiseTextstyle=\skip105
 \L@UnitsRaiseScriptstyle=\skip106
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/libraries/pgflibraryarrows.meta.
 code.tex
 File: pgflibraryarrows.meta.code.tex 2015/05/13 v3.0.1a (rcs-revision 1.13)
 \pgfarrowinset=\dimen328
 \pgfarrowlength=\dimen329
 \pgfarrowwidth=\dimen330
 \pgfarrowlinewidth=\dimen331
 )
 Package: chemformula 2017/03/23 v4.15e typeset chemical compounds and reactions
  (CN)
 \l__chemformula_tmpa_dim=\dimen332
 \l__chemformula_tmpb_dim=\dimen333
 \l__chemformula_tmpc_dim=\dimen334
 \l__chemformula_tmpa_int=\count342
 \l__chemformula_tmpb_int=\count343
 \l__chemformula_tmpc_int=\count344
 \l__chemformula_tmpa_box=\box76
 \l__chemformula_tmpb_box=\box77
 \l__chemformula_arrow_length_dim=\dimen335
 \l__chemformula_arrow_label_height_dim=\dimen336
 \l__chemformula_arrow_label_offset_dim=\dimen337
 \l__chemformula_arrow_minimum_length_dim=\dimen338
 \l__chemformula_arrow_shortage_dim=\dimen339
 \l__chemformula_arrow_offset_dim=\dimen340
 \l__chemformula_arrow_yshift_dim=\dimen341
 \l__chemformula_radical_radius_dim=\dimen342
 \l__chemformula_radical_hshift_dim=\dimen343
 \l__chemformula_radical_vshift_dim=\dimen344
 \l__chemformula_radical_space_dim=\dimen345
 \l__chemformula_arrow_head_dim=\dimen346
 \l__chemformula_name_dim=\dimen347
 \l__chemformula_adduct_space_dim=\dimen348
 \l__chemformula_charge_shift_dim=\dimen349
 \l__chemformula_subscript_shift_dim=\dimen350
 \l__chemformula_superscript_shift_dim=\dimen351
 \l__chemformula_subscript_dim=\dimen352
 \l__chemformula_superscript_dim=\dimen353
 \l__chemformula_bond_dim=\dimen354
 \l__chemformula_bond_space_dim=\dimen355
 \l__chemformula_elspec_pair_distance_dim=\dimen356
 \l__chemformula_elspec_pair_line_length_dim=\dimen357
 \l__chemformula_elspec_pair_width_dim=\dimen358
 \l__chemformula_kroegervink_positive_radius_dim=\dimen359
 \l__chemformula_kroegervink_positive_hshift_dim=\dimen360
 \l__chemformula_kroegervink_positive_vshift_dim=\dimen361
 \l__chemformula_kroegervink_positive_space_dim=\dimen362
 \l__chemformula_stoich_space_skip=\skip107
 \l__chemformula_math_space_skip=\skip108
 \l__chemformula_count_tokens_int=\count345
 \g__chemformula_lewis_int=\count346
 \l__chemformula_arrow_arg_i_box=\box78
 \l__chemformula_arrow_arg_ii_box=\box79
 \l__chemformula_superscript_box=\box80
 \l__chemformula_subscript_box=\box81
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \charrow with sig. 'mO{}O{}' on line 823.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemArrow with sig. 'mm' on line 896.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemArrow with sig. 'mm' on line 904.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemArrow with sig. 'mm' on line 911.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemArrow with sig. 'mm' on line 921.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ShowChemArrow with sig. 'm' on line 931.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ch with sig. 'O{}m' on line 1176.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \chcpd with sig. 'O{}m' on line 1198.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \chname with sig. 'R(){}R(){}' on line 1276.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemCompoundProperty with sig. 'mm' on line 1361.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemCompoundProperty with sig. 'mm' on line 1364.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemCompoundProperty with sig. 'mm' on line 1367.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemCompoundProperty with sig. 'mm' on line 1370.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RemoveChemCompoundProperty with sig. 'm' on line 1373.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemBond with sig. 'mm' on line 1571.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemBond with sig. 'mm' on line 1574.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemBond with sig. 'mm' on line 1577.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemBond with sig. 'mm' on line 1580.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemBondAlias with sig. 'mm' on line 1583.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemBondAlias with sig. 'mm' on line 1586.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ShowChemBond with sig. 'm' on line 1589.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \bond with sig. 'm' on line 1592.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \chstoich with sig. 'm' on line 2191.
 .................................................
 \l__chemformula_additions_symbol_space_skip=\skip109
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemAdditionSymbol with sig. 'mmm' on line 2697.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemAdditionSymbol with sig. 'mmm' on line 2706.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemAdditionSymbol with sig. 'mmm' on line 2715.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemAdditionSymbol with sig. 'mmm' on line 2718.
 .................................................
 \l__chemformula_plus_space_skip=\skip110
 \l__chemformula_minus_space_skip=\skip111
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \NewChemSymbol with sig. 'mm' on line 2763.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \ProvideChemSymbol with sig. 'mm' on line 2769.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \RenewChemSymbol with sig. 'mm' on line 2776.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \DeclareChemSymbol with sig. 'mm' on line 2779.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \chlewis with sig. 'O{}mm' on line 3334.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \setchemformula with sig. 'm' on line 3339.
 .................................................
 ) (./my_thesis.aux (./head/dedication.aux)
 (./head/acknowledgements.aux) (./head/abstracts.aux)
 (./main/ch_introduction.aux) (./main/ch_lab_resources.aux)
 (./main/ch_encode_peaks.aux) (./main/ch_spark.aux) (./main/ch_smile-seq.aux)
 (./main/ch_pwmscan.aux) (./main/ch_atac-seq.aux
 
 LaTeX Warning: Label `encode_peaks_algo_ndr_extend' multiply defined.
 
 ) (./main/ch_discussion.aux) (./tail/appendix.aux) (./tail/biblio.aux)
 (./tail/cv.aux))
 \openout1 = `my_thesis.aux'.
 
 LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for FML/futm/m/it on input line 18.
 LaTeX Font Info:    Try loading font information for FML+futm on input line 18.
 
  (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutm.fd
 File: fmlfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futm.
 )
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for FMS/futm/m/n on input line 18.
 LaTeX Font Info:    Try loading font information for FMS+futm on input line 18.
 
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmsfutm.fd
 File: fmsfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FMS/futm.
 )
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for FMX/futm/m/n on input line 18.
 LaTeX Font Info:    Try loading font information for FMX+futm on input line 18.
 
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmxfutm.fd
 File: fmxfutm.fd futm-extension
 )
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 18.
 LaTeX Font Info:    Try loading font information for TS1+cmr on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd
 File: ts1cmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions
 )
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Try loading font information for T1+futs on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/t1futs.fd
 File: t1futs.fd 2004/03/02 Fontinst v1.926 font definitions for T1/futs.
 )
 LaTeX Info: Redefining \degres on input line 18.
 LaTeX Info: Redefining \dots on input line 18.
 LaTeX Info: Redefining \up on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
 [Loading MPS to PDF converter (version 2006.09.02).]
 \scratchcounter=\count347
 \scratchdimen=\dimen363
 \scratchbox=\box82
 \nofMPsegments=\count348
 \nofMParguments=\count349
 \everyMPshowfont=\toks44
 \MPscratchCnt=\count350
 \MPscratchDim=\dimen364
 \MPnumerator=\count351
 \makeMPintoPDFobject=\count352
 \everyMPtoPDFconversion=\toks45
 ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty
 Package: epstopdf-base 2016/05/15 v2.6 Base part for package epstopdf
 
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/grfext.sty
 Package: grfext 2016/05/16 v1.2 Manage graphics extensions (HO)
 )
 Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4
 38.
 Package grfext Info: Graphics extension search list:
 (grfext)             [.pdf,.png,.jpg,.mps,.jpeg,.jbig2,.jb2,.PDF,.PNG,.JPG,.JPE
 G,.JBIG2,.JB2,.eps]
 (grfext)             \AppendGraphicsExtensions on input line 456.
 
 (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
 File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
 e
 ))
 Package caption Info: Begin \AtBeginDocument code.
 Package caption Info: subfig package v1.3 is loaded.
 Package caption Info: float package is loaded.
 Package caption Info: hyperref package is loaded.
 Package caption Info: listings package is loaded.
 Package caption Info: End \AtBeginDocument code.
 LaTeX Info: Redefining \microtypecontext on input line 18.
 Package microtype Info: Generating PDF output.
 Package microtype Info: Character protrusion enabled (level 2).
 Package microtype Info: Using default protrusion set `alltext'.
 Package microtype Info: Automatic font expansion enabled (level 2),
 (microtype)             stretch: 20, shrink: 20, step: 1, non-selected.
 Package microtype Info: Using default expansion set `basictext'.
 Package microtype Info: No adjustment of tracking.
 Package microtype Info: No adjustment of interword spacing.
 Package microtype Info: No adjustment of character kerning.
 Package microtype Info: Loading generic protrusion settings for font family
 (microtype)             `futs' (encoding: T1).
 (microtype)             For optimal results, create family-specific settings.
 (microtype)             See the microtype manual for details.
 \c@lstlisting=\count353
 \AtBeginShipoutBox=\box83
 Package hyperref Info: Link coloring ON on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/nameref.sty
 Package: nameref 2016/05/21 v2.44 Cross-referencing by name of section
 
 (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/gettitlestring.sty
 Package: gettitlestring 2016/05/16 v1.5 Cleanup title references (HO)
 )
 \c@section@level=\count354
 )
 LaTeX Info: Redefining \ref on input line 18.
 LaTeX Info: Redefining \pageref on input line 18.
 LaTeX Info: Redefining \nameref on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/pdflscape.sty
 Package: pdflscape 2016/05/14 v0.11 Display of landscape pages in PDF (HO)
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/lscape.sty
 Package: lscape 2000/10/22 v3.01 Landscape Pages (DPC)
 )
 Package pdflscape Info: Auto-detected driver: pdftex on input line 81.
 )
 ABD: EveryShipout initializing macros
 (/usr/share/texlive/texmf-dist/tex/latex/translations/translations-basic-dictio
 nary-english.trsl
 File: translations-basic-dictionary-english.trsl (english translation file `tra
 nslations-basic-dictionary')
 )
 Package translations Info: loading dictionary `translations-basic-dictionary' f
 or `english'. on input line 18.
 .................................................
 . chemgreek info: "mapping-activated"
 . 
 . Activating mapping `fourier' on line 18.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \insitu with sig. 'O{}' on line 18.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \abinitio with sig. 'O{}' on line 18.
 .................................................
 .................................................
 . LaTeX info: "xparse/define-command"
 . 
 . Defining command \invacuo with sig. 'O{}' on line 18.
 .................................................
  (./head/titlepage.tex
 LaTeX Font Info:    Try loading font information for T1+lmss on input line 5.
 
 (/usr/share/texmf/tex/latex/lm/t1lmss.fd
 File: t1lmss.fd 2009/10/30 v1.6 Font defs for Latin Modern
 )
 Package microtype Info: Loading generic protrusion settings for font family
 (microtype)             `lmss' (encoding: T1).
 (microtype)             For optimal results, create family-specific settings.
 (microtype)             See the microtype manual for details.
 LaTeX Font Info:    Try loading font information for FML+futmi on input line 14
 .
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutmi.fd
 File: fmlfutmi.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futmi.
 )
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 10.07397pt on input line 14.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 7.63599pt on input line 14.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 5.51999pt on input line 14.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 10.07397pt on input line 14.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 7.63599pt on input line 14.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 5.51999pt on input line 14.
 LaTeX Font Info:    Try loading font information for U+msa on input line 14.
 
 (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsa.fd
 File: umsa.fd 2013/01/14 v3.01 AMS symbols A
 )
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msa.cfg
 File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
 )
 LaTeX Font Info:    Try loading font information for U+msb on input line 14.
 
 (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsb.fd
 File: umsb.fd 2013/01/14 v3.01 AMS symbols B
 )
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msb.cfg
 File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
 )
 <images/epfl.pdf, id=7, 612.2875pt x 265.99374pt>
 File: images/epfl.pdf Graphic file (type pdf)
 <use images/epfl.pdf>
 Package pdftex.def Info: images/epfl.pdf  used on input line 15.
 (pdftex.def)             Requested size: 113.81102pt x 49.4394pt.
 
 Overfull \hbox (23.99998pt too wide) in paragraph at lines 14--42
  [][] 
  []
 
 [1
 
 
 
 
 
 {/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} <./images/epfl.pdf>])
 \openout2 = `head/dedication.aux'.
 
 
 (./head/dedication.tex [2
 
 
 ]) [3]
 \openout2 = `head/acknowledgements.aux'.
 
  (./head/acknowledgements.tex [0
 
 
 
 ]
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <10.95> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 1.
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <24.88> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 1.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 22.88956pt on input line 1.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 15.89755pt on input line 1.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 11.03998pt on input line 1.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 22.88956pt on input line 1.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 15.89755pt on input line 1.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 11.03998pt on input line 1.
 )pdfTeX warning (ext4): destination with the same identifier (name{page.i}) has
  been already used, duplicate ignored
 <to be read again> 
                    \relax 
 l.25 \include{head/acknowledgements}
                                      [1]
 \openout2 = `head/abstracts.aux'.
 
  (./head/abstracts.texpdfTeX warning (ext4): destination with the same identifi
 er (name{page.ii}) has been already used, duplicate ignored
 <to be read again> 
                    \relax 
 l.6 \cleardoublepage
                      [2
 
 
 
 ]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
 pdfTeX warning (ext4): destination with the same identifier (name{page.iii}) ha
 s been already used, duplicate ignored
 <to be read again> 
                    \relax 
 l.27 
       [3
 
 ] [4]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [5
 
 
 ])
 [6] (./my_thesis.toc [7
 
 
 
 ] [8]
 Overfull \hbox (1.22647pt too wide) detected at line 117
 \T1/futs/m/n/10.95 7.10.10 
  []
 
 
 Overfull \hbox (1.22647pt too wide) detected at line 118
 \T1/futs/m/n/10.95 7.10.11 
  []
 
 [9])
 \tf@toc=\write4
 \openout4 = `my_thesis.toc'.
 
  [10]
 \openout2 = `main/ch_introduction.aux'.
 
  (./main/ch_introduction.tex
 Chapter 1.
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <14.4> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 13.
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <12> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 18.
 <images/ch_introduction/chromatin.png, id=316, 1047.112pt x 1426.128pt>
 File: images/ch_introduction/chromatin.png Graphic file (type png)
 <use images/ch_introduction/chromatin.png>
 Package pdftex.def Info: images/ch_introduction/chromatin.png  used on input li
 ne 23.
 (pdftex.def)             Requested size: 314.13602pt x 427.8417pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [1
 
 
 
 
 ]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [2]
 [3 <./images/ch_introduction/chromatin.png>]
 <images/ch_introduction/nucleosome_positioning.png, id=366, 1454.50488pt x 1152
 .33661pt>
 File: images/ch_introduction/nucleosome_positioning.png Graphic file (type png)
 
 <use images/ch_introduction/nucleosome_positioning.png>
 Package pdftex.def Info: images/ch_introduction/nucleosome_positioning.png  use
 d on input line 57.
 (pdftex.def)             Requested size: 290.89583pt x 230.46324pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [4]
 [5 <./images/ch_introduction/nucleosome_positioning.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [6]
 <images/ch_introduction/TF_associations.png, id=431, 667.9813pt x 381.48236pt>
 File: images/ch_introduction/TF_associations.png Graphic file (type png)
 <use images/ch_introduction/TF_associations.png>
 Package pdftex.def Info: images/ch_introduction/TF_associations.png  used on in
 put line 103.
 (pdftex.def)             Requested size: 267.18777pt x 152.59023pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [7]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [8 <./images/ch_introduction/TF_associations.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [9]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [10]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [11]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [12]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [13]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [14]
 <images/ch_introduction/dgf.png, id=648, 1138.67929pt x 1061.2878pt>
 File: images/ch_introduction/dgf.png Graphic file (type png)
 <use images/ch_introduction/dgf.png>
 Package pdftex.def Info: images/ch_introduction/dgf.png  used on input line 238
 .
 (pdftex.def)             Requested size: 341.60641pt x 318.3888pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [15]
 [16 <./images/ch_introduction/dgf.png>]
 LaTeX Font Info:    Try loading font information for TS1+futs on input line 273
 .
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/ts1futs.fd
 File: ts1futs.fd 2004/03/26 Fontinst v1.926 font definitions for TS1/futs.
 )
 Package microtype Info: Loading generic protrusion settings for font family
 (microtype)             `futs' (encoding: TS1).
 (microtype)             For optimal results, create family-specific settings.
 (microtype)             See the microtype manual for details.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [17]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [18]
 <images/ch_introduction/figure_pwm.png, id=749, 1452.627pt x 913.814pt>
 File: images/ch_introduction/figure_pwm.png Graphic file (type png)
 <use images/ch_introduction/figure_pwm.png>
 Package pdftex.def Info: images/ch_introduction/figure_pwm.png  used on input l
 ine 341.
 (pdftex.def)             Requested size: 290.52025pt x 182.75955pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [19]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [20 <./images/ch_introduction/figure_pwm.png>]
 <images/ch_introduction/shift_flip.png, id=786, 1417.295pt x 262.581pt>
 File: images/ch_introduction/shift_flip.png Graphic file (type png)
 <use images/ch_introduction/shift_flip.png>
 Package pdftex.def Info: images/ch_introduction/shift_flip.png  used on input l
 ine 394.
 (pdftex.def)             Requested size: 425.19179pt x 78.7749pt.
 
 Overfull \hbox (7.50533pt too wide) in paragraph at lines 394--395
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [21 <./images/ch_introduction/shift_flip.png>])
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [22]
 [23]
 \openout2 = `main/ch_lab_resources.aux'.
 
  (./main/ch_lab_resources.tex [24
 
 
 
 ]
 Chapter 2.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [25
 
 ]
 <images/ch_lab_resources/mga_figure1.jpeg, id=854, 500.28908pt x 167.6664pt>
 File: images/ch_lab_resources/mga_figure1.jpeg Graphic file (type jpg)
 <use images/ch_lab_resources/mga_figure1.jpeg>
 Package pdftex.def Info: images/ch_lab_resources/mga_figure1.jpeg  used on inpu
 t line 25.
 (pdftex.def)             Requested size: 400.23181pt x 134.13329pt.
 
 [26 <./images/ch_lab_resources/mga_figure1.jpeg>] [27]
 <images/ch_lab_resources/epd_figure1.jpeg, id=926, 239.03302pt x 194.22563pt>
 File: images/ch_lab_resources/epd_figure1.jpeg Graphic file (type jpg)
 <use images/ch_lab_resources/epd_figure1.jpeg>
 Package pdftex.def Info: images/ch_lab_resources/epd_figure1.jpeg  used on inpu
 t line 61.
 (pdftex.def)             Requested size: 215.12772pt x 174.80144pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [28
 
  <./images/ch_lab_resources/epd_figure1.jpeg>]
 <images/ch_lab_resources/epd_motifs.png, id=954, 1156.32pt x 578.16pt>
 File: images/ch_lab_resources/epd_motifs.png Graphic file (type png)
 <use images/ch_lab_resources/epd_motifs.png>
 Package pdftex.def Info: images/ch_lab_resources/epd_motifs.png  used on input 
 line 113.
 (pdftex.def)             Requested size: 346.89868pt x 173.44933pt.
  [29]
 Underfull \vbox (badness 2600) has occurred while \output is active []
 
  [30 <./images/ch_lab_resources/epd_motifs.png (PNG copy)>]) [31]
 \openout2 = `main/ch_encode_peaks.aux'.
 
 
 (./main/ch_encode_peaks.tex [32
 
 
 
 ]
 Chapter 3.
 <images/ch_encode_peaks/peaklist_peaknumber_GM12878.png, id=1012, 1734.48pt x 5
 78.16pt>
 File: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png Graphic file (type
  png)
 <use images/ch_encode_peaks/peaklist_peaknumber_GM12878.png>
 Package pdftex.def Info: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png
   used on input line 23.
 (pdftex.def)             Requested size: 520.34802pt x 173.44933pt.
 
 Overfull \hbox (102.66156pt too wide) in paragraph at lines 23--24
  [] 
  []
 
 <images/ch_encode_peaks/peaklist_proportions_GM12878.png, id=1017, 1734.48pt x 
 578.16pt>
 File: images/ch_encode_peaks/peaklist_proportions_GM12878.png Graphic file (typ
 e png)
 <use images/ch_encode_peaks/peaklist_proportions_GM12878.png>
 Package pdftex.def Info: images/ch_encode_peaks/peaklist_proportions_GM12878.pn
 g  used on input line 32.
 (pdftex.def)             Requested size: 520.34802pt x 173.44933pt.
 
 Overfull \hbox (102.66156pt too wide) in paragraph at lines 32--33
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [33
 
 ]
 [34 <./images/ch_encode_peaks/peaklist_peaknumber_GM12878.png (PNG copy)> <./im
 ages/ch_encode_peaks/peaklist_proportions_GM12878.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [35]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [36]
 <images/ch_encode_peaks/MNase_profiles.png, id=1085, 1508.63625pt x 1038.279pt>
 
 File: images/ch_encode_peaks/MNase_profiles.png Graphic file (type png)
 <use images/ch_encode_peaks/MNase_profiles.png>
 Package pdftex.def Info: images/ch_encode_peaks/MNase_profiles.png  used on inp
 ut line 103.
 (pdftex.def)             Requested size: 377.15814pt x 259.5691pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [37]
 [38 <./images/ch_encode_peaks/MNase_profiles.png>]
 <images/ch_encode_peaks/colocalization_ctcf.png, id=1120, 1612.8255pt x 1282.19
 025pt>
 File: images/ch_encode_peaks/colocalization_ctcf.png Graphic file (type png)
 <use images/ch_encode_peaks/colocalization_ctcf.png>
 Package pdftex.def Info: images/ch_encode_peaks/colocalization_ctcf.png  used o
 n input line 131.
 (pdftex.def)             Requested size: 403.20538pt x 320.54678pt.
 <images/ch_encode_peaks/CTCF_ndr_length_rad212.png, id=1121, 1011.78pt x 578.16
 pt>
 File: images/ch_encode_peaks/CTCF_ndr_length_rad212.png Graphic file (type png)
 
 <use images/ch_encode_peaks/CTCF_ndr_length_rad212.png>
 Package pdftex.def Info: images/ch_encode_peaks/CTCF_ndr_length_rad212.png  use
 d on input line 139.
 (pdftex.def)             Requested size: 404.70483pt x 231.2599pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [39]
 [40 <./images/ch_encode_peaks/colocalization_ctcf.png>] [41 <./images/ch_encode
 _peaks/CTCF_ndr_length_rad212.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [42]
 <images/ch_encode_peaks/ctcf_motif_association.png, id=1168, 1084.05pt x 847.96
 8pt>
 File: images/ch_encode_peaks/ctcf_motif_association.png Graphic file (type png)
 
 <use images/ch_encode_peaks/ctcf_motif_association.png>
 Package pdftex.def Info: images/ch_encode_peaks/ctcf_motif_association.png  use
 d on input line 177.
 (pdftex.def)             Requested size: 433.61232pt x 339.18118pt.
 
 Overfull \hbox (15.92586pt too wide) in paragraph at lines 177--178
  [] 
  []
 
 
 LaTeX Warning: Float too large for page by 31.83305pt on input line 233.
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [43]
 [44 <./images/ch_encode_peaks/ctcf_motif_association.png>] [45]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [46]
 <images/ch_encode_peaks/ebf1_haib_1.png, id=1246, 650.43pt x 867.24pt>
 File: images/ch_encode_peaks/ebf1_haib_1.png Graphic file (type png)
 <use images/ch_encode_peaks/ebf1_haib_1.png>
 Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_1.png  used on input 
 line 262.
 (pdftex.def)             Requested size: 260.16739pt x 346.88986pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [47]
 [48 <./images/ch_encode_peaks/ebf1_haib_1.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [49]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [50]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [51]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [52]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [53]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [54]
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <8> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 455.
 LaTeX Font Info:    Try loading font information for T1+lmtt on input line 455.
 
 
 (/usr/share/texmf/tex/latex/lm/t1lmtt.fd
 File: t1lmtt.fd 2009/10/30 v1.6 Font defs for Latin Modern
 )
 Package microtype Info: Loading generic protrusion settings for font family
 (microtype)             `lmtt' (encoding: T1).
 (microtype)             For optimal results, create family-specific settings.
 (microtype)             See the microtype manual for details.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [55]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [56]
 Overfull \hbox (9.9085pt too wide) in paragraph at lines 478--479
 \T1/futs/m/n/10.95 (-20) ences were the cor-rected EBF1 peaks (wgEn-codeAwgTf-b
 -sHaibGm12878Ebf1sc137065Pcr1xUniPk
  []
 
 )
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [57]
 [58]
 \openout2 = `main/ch_spark.aux'.
 
  (./main/ch_spark.tex
 Chapter 4.
 Package hyperref Info: bookmark level for unknown toc defaults to 0 on input li
 ne 5.
 
-Package natbib Warning: Citation `nielsen_catchprofiles' on page 59 undefined o
-n input line 13.
-
-
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [59
 
 
 
 
 ]
-<images/ch_spark/supplemental_figure1.pdf, id=1468, 1011.78pt x 505.89pt>
-File: images/ch_spark/supplemental_figure1.pdf Graphic file (type pdf)
-<use images/ch_spark/supplemental_figure1.pdf>
-Package pdftex.def Info: images/ch_spark/supplemental_figure1.pdf  used on inpu
+<images/ch_spark/supplemental_figure1.png, id=1471, 1011.78pt x 505.89pt>
+File: images/ch_spark/supplemental_figure1.png Graphic file (type png)
+<use images/ch_spark/supplemental_figure1.png>
+Package pdftex.def Info: images/ch_spark/supplemental_figure1.png  used on inpu
 t line 42.
 (pdftex.def)             Requested size: 404.70483pt x 202.3524pt.
-<images/ch_spark/supplemental_figure2.pdf, id=1469, 1156.32pt x 505.89pt>
-File: images/ch_spark/supplemental_figure2.pdf Graphic file (type pdf)
-<use images/ch_spark/supplemental_figure2.pdf>
-Package pdftex.def Info: images/ch_spark/supplemental_figure2.pdf  used on inpu
-t line 49.
+<images/ch_spark/supplemental_figure2.png, id=1476, 1156.32pt x 505.89pt>
+File: images/ch_spark/supplemental_figure2.png Graphic file (type png)
+<use images/ch_spark/supplemental_figure2.png>
+Package pdftex.def Info: images/ch_spark/supplemental_figure2.png  used on inpu
+t line 50.
 (pdftex.def)             Requested size: 462.5198pt x 202.3524pt.
-<images/ch_spark/supplemental_figure4.pdf, id=1474, 722.7pt x 433.62pt>
-File: images/ch_spark/supplemental_figure4.pdf Graphic file (type pdf)
-<use images/ch_spark/supplemental_figure4.pdf>
-Package pdftex.def Info: images/ch_spark/supplemental_figure4.pdf  used on inpu
-t line 57.
+<images/ch_spark/supplemental_figure4.png, id=1481, 722.7pt x 433.62pt>
+File: images/ch_spark/supplemental_figure4.png Graphic file (type png)
+<use images/ch_spark/supplemental_figure4.png>
+Package pdftex.def Info: images/ch_spark/supplemental_figure4.png  used on inpu
+t line 58.
 (pdftex.def)             Requested size: 289.07487pt x 173.44492pt.
-<images/ch_spark/supplemental_figure5.pdf, id=1479, 722.7pt x 433.62pt>
-File: images/ch_spark/supplemental_figure5.pdf Graphic file (type pdf)
-<use images/ch_spark/supplemental_figure5.pdf>
-Package pdftex.def Info: images/ch_spark/supplemental_figure5.pdf  used on inpu
-t line 65.
+<images/ch_spark/supplemental_figure5.png, id=1486, 722.7pt x 433.62pt>
+File: images/ch_spark/supplemental_figure5.png Graphic file (type png)
+<use images/ch_spark/supplemental_figure5.png>
+Package pdftex.def Info: images/ch_spark/supplemental_figure5.png  used on inpu
+t line 66.
 (pdftex.def)             Requested size: 289.07487pt x 173.44492pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [60]
-Overfull \hbox (24.71843pt too wide) in paragraph at lines 74--75
+[61 <./images/ch_spark/supplemental_figure1.png (PNG copy)>] [62 <./images/ch_s
+park/supplemental_figure2.png (PNG copy)>] [63 <./images/ch_spark/supplemental_
+figure4.png> <./images/ch_spark/supplemental_figure5.png (PNG copy)>]
+Overfull \hbox (24.71843pt too wide) in paragraph at lines 75--76
 [][]\T1/futs/m/n/10.95 (-20) 2006[][]) was used. Calls to kc-caFam-ily(dist=dis
 tEuclidean, cent=centMean) or kc-caFam-ily(dist=distCor,
  []
 
 
-Overfull \hbox (3.89964pt too wide) in paragraph at lines 77--78
+Overfull \hbox (3.89964pt too wide) in paragraph at lines 78--79
 \T1/futs/m/n/10.95 (-20) The im-ple-men-ta-tion was done in R pro-gram-ming lan
 -guage. The "em_shape", "em_shape_shift"
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [61 <./images/ch_spark/supplemental_figure1.pdf>] [62 <./images/ch_spark/suppl
-emental_figure2.pdf>] [63 <./images/ch_spark/supplemental_figure4.pdf> <./image
-s/ch_spark/supplemental_figure5.pdf>]
-Underfull \hbox (badness 10000) in paragraph at lines 89--90
+ [64]
+Underfull \hbox (badness 10000) in paragraph at lines 90--91
 
  []
 
-
-Underfull \vbox (badness 10000) has occurred while \output is active []
-
- [64]
-<images/ch_spark/supplemental_figure8.pdf, id=1562, 1011.78pt x 578.16pt>
-File: images/ch_spark/supplemental_figure8.pdf Graphic file (type pdf)
-<use images/ch_spark/supplemental_figure8.pdf>
-Package pdftex.def Info: images/ch_spark/supplemental_figure8.pdf  used on inpu
-t line 98.
+<images/ch_spark/figure1.png, id=1541, 1011.78pt x 578.16pt>
+File: images/ch_spark/figure1.png Graphic file (type png)
+<use images/ch_spark/figure1.png>
+Package pdftex.def Info: images/ch_spark/figure1.png  used on input line 99.
 (pdftex.def)             Requested size: 404.70483pt x 231.2599pt.
-<images/ch_spark/figure1.pdf, id=1567, 1011.78pt x 578.16pt>
-File: images/ch_spark/figure1.pdf Graphic file (type pdf)
-<use images/ch_spark/figure1.pdf>
-Package pdftex.def Info: images/ch_spark/figure1.pdf  used on input line 105.
+<images/ch_spark/supplemental_figure8.png, id=1546, 1011.78pt x 578.16pt>
+File: images/ch_spark/supplemental_figure8.png Graphic file (type png)
+<use images/ch_spark/supplemental_figure8.png>
+Package pdftex.def Info: images/ch_spark/supplemental_figure8.png  used on inpu
+t line 107.
 (pdftex.def)             Requested size: 404.70483pt x 231.2599pt.
-)
-[65] [66 <./images/ch_spark/supplemental_figure8.pdf>] [67 <./images/ch_spark/f
-igure1.pdf>]
-\openout2 = `main/ch_smile-seq.aux'.
-
- (./main/ch_smile-seq.tex [68
 
+Underfull \vbox (badness 10000) has occurred while \output is active []
 
+ [65]
+[66 <./images/ch_spark/figure1.png (PNG copy)>] [67 <./images/ch_spark/suppleme
+ntal_figure8.png (PNG copy)>]) [68]
+\openout2 = `main/ch_smile-seq.aux'.
 
-]
+ (./main/ch_smile-seq.tex
 Chapter 5.
-<images/ch_smile-seq/figure1.jpg, id=1622, 929.4725pt x 1206.5075pt>
+<images/ch_smile-seq/figure1.jpg, id=1591, 929.4725pt x 1206.5075pt>
 File: images/ch_smile-seq/figure1.jpg Graphic file (type jpg)
 <use images/ch_smile-seq/figure1.jpg>
 Package pdftex.def Info: images/ch_smile-seq/figure1.jpg  used on input line 23
 .
 (pdftex.def)             Requested size: 232.36755pt x 301.62613pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [69
 
+
+
+
 ]
 [70 <./images/ch_smile-seq/figure1.jpg>]
-<images/ch_smile-seq/figure_hmm.png, id=1651, 2081.16266pt x 1075.51627pt>
+<images/ch_smile-seq/figure_hmm.png, id=1621, 2081.16266pt x 1075.51627pt>
 File: images/ch_smile-seq/figure_hmm.png Graphic file (type png)
 <use images/ch_smile-seq/figure_hmm.png>
 Package pdftex.def Info: images/ch_smile-seq/figure_hmm.png  used on input line
  41.
 (pdftex.def)             Requested size: 416.22516pt x 215.09944pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [71 <./images/ch_smile-seq/figure_hmm.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [72]
-<images/ch_smile-seq/figure2b_3a.png, id=1678, 1994.652pt x 834.11626pt>
+<images/ch_smile-seq/figure2b_3a.png, id=1648, 1994.652pt x 834.11626pt>
 File: images/ch_smile-seq/figure2b_3a.png Graphic file (type png)
 <use images/ch_smile-seq/figure2b_3a.png>
 Package pdftex.def Info: images/ch_smile-seq/figure2b_3a.png  used on input lin
 e 118.
 (pdftex.def)             Requested size: 398.92334pt x 166.8203pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [73]
 [74 <./images/ch_smile-seq/figure2b_3a.png>]) [75]
 \openout2 = `main/ch_pwmscan.aux'.
 
  (./main/ch_pwmscan.tex
 [76
 
 
 
 ]
 Chapter 6.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [77
 
 ]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [78]
-<images/ch_lab_resources/pwmscan_flowchart.png, id=1728, 2792.031pt x 3705.042p
+<images/ch_lab_resources/pwmscan_flowchart.png, id=1698, 2792.031pt x 3705.042p
 t>
 File: images/ch_lab_resources/pwmscan_flowchart.png Graphic file (type png)
 <use images/ch_lab_resources/pwmscan_flowchart.png>
 Package pdftex.def Info: images/ch_lab_resources/pwmscan_flowchart.png  used on
  input line 51.
 (pdftex.def)             Requested size: 279.21945pt x 370.52591pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [79]
 [80 <./images/ch_lab_resources/pwmscan_flowchart.png>]
-<images/ch_lab_resources/pwmscan_figure_s1.png, id=1755, 1348.03625pt x 768.872
+<images/ch_lab_resources/pwmscan_figure_s1.png, id=1724, 1348.03625pt x 768.872
 5pt>
 File: images/ch_lab_resources/pwmscan_figure_s1.png Graphic file (type png)
 <use images/ch_lab_resources/pwmscan_figure_s1.png>
 Package pdftex.def Info: images/ch_lab_resources/pwmscan_figure_s1.png  used on
  input line 86.
 (pdftex.def)             Requested size: 269.60248pt x 153.77177pt.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 8.27998pt on input line 99.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 6.99199pt on input line 99.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 8.27998pt on input line 99.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 6.99199pt on input line 99.
 
 Underfull \vbox (badness 2875) has occurred while \output is active []
 
  [81 <./images/ch_lab_resources/pwmscan_figure_s1.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [82])
 [83]
 \openout2 = `main/ch_atac-seq.aux'.
 
  (./main/ch_atac-seq.tex [84
 
 
 
 ]
 Chapter 7.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [85
 
 ]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [86]
-<images/ch_atac-seq/em.png, id=1847, 2585.78645pt x 1933.64929pt>
+<images/ch_atac-seq/em.png, id=1817, 2585.78645pt x 1933.64929pt>
 File: images/ch_atac-seq/em.png Graphic file (type png)
 <use images/ch_atac-seq/em.png>
 Package pdftex.def Info: images/ch_atac-seq/em.png  used on input line 79.
 (pdftex.def)             Requested size: 465.4215pt x 348.04185pt.
 
 Overfull \hbox (47.73503pt too wide) in paragraph at lines 79--81
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [87]
 [88 <./images/ch_atac-seq/em.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [89]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [90]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [91]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [92]
-<images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png, id=1921, 1300.8
+<images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png, id=1891, 1300.8
 6pt x 1156.32pt>
 File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png Graphic fil
 e (type png)
 <use images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png>
 Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas
 s_2.png  used on input line 253.
 (pdftex.def)             Requested size: 455.30783pt x 404.71806pt.
 
 Overfull \hbox (37.62137pt too wide) in paragraph at lines 253--254
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [93]
 [94 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png (PNG copy)
 >]
-<images/ch_atac-seq/data_classCTCF_8class.png, id=1944, 1445.4pt x 722.7pt>
+<images/ch_atac-seq/data_classCTCF_8class.png, id=1914, 1445.4pt x 722.7pt>
 File: images/ch_atac-seq/data_classCTCF_8class.png Graphic file (type png)
 <use images/ch_atac-seq/data_classCTCF_8class.png>
 Package pdftex.def Info: images/ch_atac-seq/data_classCTCF_8class.png  used on 
 input line 280.
 (pdftex.def)             Requested size: 433.62335pt x 216.81166pt.
 
 Overfull \hbox (15.93689pt too wide) in paragraph at lines 280--281
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [95]
 [96 <./images/ch_atac-seq/data_classCTCF_8class.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [97]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [98]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [99]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [100]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [101]
 Overfull \vbox (45.20699pt too high) has occurred while \output is active []
 
 
 [102] [103] [104]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [105]
 Underfull \hbox (badness 3343) in paragraph at lines 511--511
 \T1/futs/m/n/10.95 (+20) FOSL2, JUNB, JUN::JUNB, FOSL1::JUND, FOS::JUN,
  []
 
 
 Overfull \hbox (5.93637pt too wide) in paragraph at lines 503--534
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [106]
 [107]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [108])
 [109]
 \openout2 = `main/ch_discussion.aux'.
 
  (./main/ch_discussion.tex [110
 
 
 
 ]
 Chapter 8.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [111
 
 ]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [112])
 [113] [114
 
 
 ]
 \openout2 = `tail/appendix.aux'.
 
  (./tail/appendix.tex
 Appendix A.
 <images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_allpeak
-s_EM_4class_15shift_flip.png, id=2149, 602.25pt x 903.375pt>
+s_EM_4class_15shift_flip.png, id=2119, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_al
 lpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_all
 peaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUni
 Pk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on input line 21.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [115
 
 
 ]
 Overfull \vbox (192.32838pt too high) has occurred while \output is active []
 
 
 [116 <./images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_
 allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
 <images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_GM12878
-_allpeaks_EM_4class_15shift_flip.png, id=2158, 602.25pt x 903.375pt>
+_allpeaks_EM_4class_15shift_flip.png, id=2128, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_GM
 12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_GM1
 2878_allpeaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1I
 ggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on input lin
 e 29.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 <images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM12878_allpe
-aks_EM_4class_15shift_flip.png, id=2159, 602.25pt x 903.375pt>
+aks_EM_4class_15shift_flip.png, id=2129, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM12878_
 allpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM12878_a
 llpeaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosU
 niPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on input line 37.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [117 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase
 _GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
 <images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_GM12878_
-allpeaks_EM_4class_15shift_flip.png, id=2164, 602.25pt x 903.375pt>
+allpeaks_EM_4class_15shift_flip.png, id=2134, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_GM1
 2878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_GM12
 878_allpeaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIg
 gmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on input line
  45.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [118 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM128
 78_allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
 <images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_MNase_GM
-12878_allpeaks_EM_4class_15shift_flip.png, id=2169, 602.25pt x 903.375pt>
+12878_allpeaks_EM_4class_15shift_flip.png, id=2139, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_MNa
 se_GM12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_MNas
 e_GM12878_allpeaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1
 a300IggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on inpu
 t line 53.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [119 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_
 GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
-<images/ch_encode_peaks/ctcf_ndr.png, id=2175, 433.62pt x 578.16pt>
+<images/ch_encode_peaks/ctcf_ndr.png, id=2145, 433.62pt x 578.16pt>
 File: images/ch_encode_peaks/ctcf_ndr.png Graphic file (type png)
 <use images/ch_encode_peaks/ctcf_ndr.png>
 Package pdftex.def Info: images/ch_encode_peaks/ctcf_ndr.png  used on input lin
 e 61.
 (pdftex.def)             Requested size: 346.89647pt x 462.52863pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [120 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_
 MNase_GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
-<images/ch_encode_peaks/jund_motif_association.png, id=2180, 1084.05pt x 847.96
+<images/ch_encode_peaks/jund_motif_association.png, id=2150, 1084.05pt x 847.96
 8pt>
 File: images/ch_encode_peaks/jund_motif_association.png Graphic file (type png)
 
 <use images/ch_encode_peaks/jund_motif_association.png>
 Package pdftex.def Info: images/ch_encode_peaks/jund_motif_association.png  use
 d on input line 69.
 (pdftex.def)             Requested size: 433.61232pt x 339.18118pt.
 
 Overfull \hbox (15.92586pt too wide) in paragraph at lines 69--70
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [121 <./images/ch_encode_peaks/ctcf_ndr.png (PNG copy)>]
-<images/ch_encode_peaks/ebf1_haib_3.png, id=2185, 650.43pt x 289.08pt>
+<images/ch_encode_peaks/ebf1_haib_3.png, id=2155, 650.43pt x 289.08pt>
 File: images/ch_encode_peaks/ebf1_haib_3.png Graphic file (type png)
 <use images/ch_encode_peaks/ebf1_haib_3.png>
 Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_3.png  used on input 
 line 77.
 (pdftex.def)             Requested size: 260.16739pt x 115.62994pt.
-<images/ch_encode_peaks/MA0154_3.png, id=2186, 722.7pt x 361.35pt>
+<images/ch_encode_peaks/MA0154_3.png, id=2156, 722.7pt x 361.35pt>
 File: images/ch_encode_peaks/MA0154_3.png Graphic file (type png)
 <use images/ch_encode_peaks/MA0154_3.png>
 Package pdftex.def Info: images/ch_encode_peaks/MA0154_3.png  used on input lin
 e 85.
 (pdftex.def)             Requested size: 361.3491pt x 180.67456pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [122 <./images/ch_encode_peaks/jund_motif_association.png>]
-<images/ch_encode_peaks/ebf1_haib_2.png, id=2197, 650.43pt x 867.24pt>
+<images/ch_encode_peaks/ebf1_haib_2.png, id=2166, 650.43pt x 867.24pt>
 File: images/ch_encode_peaks/ebf1_haib_2.png Graphic file (type png)
 <use images/ch_encode_peaks/ebf1_haib_2.png>
 Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_2.png  used on input 
 line 93.
 (pdftex.def)             Requested size: 260.16739pt x 346.88986pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [123 <./images/ch_encode_peaks/ebf1_haib_3.png (PNG copy)> <./images/ch_encode
 _peaks/MA0154_3.png>] [124 <./images/ch_encode_peaks/ebf1_haib_2.png (PNG copy)
 >]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [125]
 Overfull \vbox (122.1795pt too high) has occurred while \output is active []
 
 
 [126]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [127]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [128]
 Overfull \vbox (48.86317pt too high) has occurred while \output is active []
 
 
 [129]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [130]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [131]
 Overfull \vbox (102.86353pt too high) has occurred while \output is active []
 
 
 [132]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [133]
 Overfull \vbox (118.59161pt too high) has occurred while \output is active []
 
 
 [134]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [135]
 Overfull \vbox (103.50354pt too high) has occurred while \output is active []
 
 
 [136]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [137]
-<images/ch_smile-seq/figure_s4_reproduced.png, id=2528, 1413.53291pt x 783.5889
+<images/ch_smile-seq/figure_s4_reproduced.png, id=2497, 1413.53291pt x 783.5889
 pt>
 File: images/ch_smile-seq/figure_s4_reproduced.png Graphic file (type png)
 <use images/ch_smile-seq/figure_s4_reproduced.png>
 Package pdftex.def Info: images/ch_smile-seq/figure_s4_reproduced.png  used on 
 input line 526.
 (pdftex.def)             Requested size: 424.06316pt x 235.07848pt.
 
 Overfull \hbox (6.3767pt too wide) in paragraph at lines 526--527
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [138]
-<images/ch_atac-seq/fragment_lengths.png, id=2561, 1445.4pt x 433.62pt>
+<images/ch_atac-seq/fragment_lengths.png, id=2531, 1445.4pt x 433.62pt>
 File: images/ch_atac-seq/fragment_lengths.png Graphic file (type png)
 <use images/ch_atac-seq/fragment_lengths.png>
 Package pdftex.def Info: images/ch_atac-seq/fragment_lengths.png  used on input
  line 541.
 (pdftex.def)             Requested size: 433.62335pt x 130.087pt.
 
 Overfull \hbox (15.93689pt too wide) in paragraph at lines 541--542
  [] 
  []
 
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
-<images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png, id=2562, 867.24pt x 650
+<images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png, id=2532, 867.24pt x 650
 .43pt>
 File: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png Graphic file (type 
 png)
 <use images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png>
 Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png 
  used on input line 549.
 (pdftex.def)             Requested size: 346.88986pt x 260.16739pt.
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
 [139 <./images/ch_smile-seq/figure_s4_reproduced.png>]
-<images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png, id=2576, 1300.86pt x 650.4
+<images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png, id=2546, 1300.86pt x 650.4
 3pt>
 File: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png Graphic file (type png
 )
 <use images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png>
 Package pdftex.def Info: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png  us
 ed on input line 568.
 (pdftex.def)             Requested size: 390.26102pt x 195.1305pt.
 
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [140 <./images/ch_atac-seq/fragment_lengths.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [141 <./images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [142 <./images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png (PNG copy)>]
-<images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png, id=2611, 5
+<images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png, id=2581, 5
 05.89pt x 578.16pt>
 File: images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png Graphi
 c file (type png)
 <use images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png>
 Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_bes
 t_motifs.png  used on input line 607.
 (pdftex.def)             Requested size: 202.3524pt x 231.2599pt.
 
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
-<images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png, id=2616, 867.2
+<images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png, id=2586, 867.2
 4pt x 433.62pt>
 File: images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png Graphic fi
 le (type png)
 <use images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png>
 Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_auc
 _roc.png  used on input line 615.
 (pdftex.def)             Requested size: 346.88986pt x 173.44492pt.
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
-<images/ch_atac-seq/sp1_motifs_7class.png, id=2617, 1300.86pt x 867.24pt>
+<images/ch_atac-seq/sp1_motifs_7class.png, id=2587, 1300.86pt x 867.24pt>
 File: images/ch_atac-seq/sp1_motifs_7class.png Graphic file (type png)
 <use images/ch_atac-seq/sp1_motifs_7class.png>
 Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_7class.png  used on inpu
 t line 623.
 (pdftex.def)             Requested size: 455.30783pt x 303.53854pt.
 
 Overfull \hbox (37.62137pt too wide) in paragraph at lines 623--624
  [] 
  []
 
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
-<images/ch_atac-seq/sp1_motifs_10class.png, id=2618, 1300.86pt x 867.24pt>
+<images/ch_atac-seq/sp1_motifs_10class.png, id=2588, 1300.86pt x 867.24pt>
 File: images/ch_atac-seq/sp1_motifs_10class.png Graphic file (type png)
 <use images/ch_atac-seq/sp1_motifs_10class.png>
 Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_10class.png  used on inp
 ut line 631.
 (pdftex.def)             Requested size: 455.30783pt x 303.53854pt.
 
 Overfull \hbox (37.62137pt too wide) in paragraph at lines 631--632
  [] 
  []
 
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [143]
 Underfull \vbox (badness 2088) has occurred while \output is active []
 
  [144 <./images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png (PN
 G copy)>]
 [145 <./images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png (PNG cop
 y)>]
-<images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png, id=2651, 1300.86pt x 5
+<images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png, id=2620, 1300.86pt x 5
 78.16pt>
 File: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png Graphic file (type
  png)
 <use images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png>
 Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png
   used on input line 653.
 (pdftex.def)             Requested size: 390.26102pt x 173.44933pt.
 
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
-<images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png, id=2652, 1300.86pt x 57
+<images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png, id=2621, 1300.86pt x 57
 8.16pt>
 File: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png Graphic file (type 
 png)
 <use images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png>
 Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png 
  used on input line 661.
 (pdftex.def)             Requested size: 390.26102pt x 173.44933pt.
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
-<images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png, id=2653, 1300.86pt x 578
+<images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png, id=2622, 1300.86pt x 578
 .16pt>
 File: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png Graphic file (type p
 ng)
 <use images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png>
 Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png  
 used on input line 669.
 (pdftex.def)             Requested size: 455.30783pt x 202.35902pt.
 
 Overfull \hbox (37.62137pt too wide) in paragraph at lines 669--670
  [] 
  []
 
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
-<images/ch_atac-seq/sp1_motifs_6class_shift_flip.png, id=2654, 1300.86pt x 578.
+<images/ch_atac-seq/sp1_motifs_6class_shift_flip.png, id=2623, 1300.86pt x 578.
 16pt>
 File: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png Graphic file (type pn
 g)
 <use images/ch_atac-seq/sp1_motifs_6class_shift_flip.png>
 Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png  u
 sed on input line 677.
 (pdftex.def)             Requested size: 390.26102pt x 173.44933pt.
 
 LaTeX Warning: `h' float specifier changed to `ht'.
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [146 <./images/ch_atac-seq/sp1_motifs_7class.png (PNG copy)>]
 Underfull \vbox (badness 4341) has occurred while \output is active []
 
  [147 <./images/ch_atac-seq/sp1_motifs_10class.png (PNG copy)>]
-<images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png, id=2678, 1300.86p
+<images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png, id=2648, 1300.86p
 t x 1156.32pt>
 File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png Graphic file 
 (type png)
 <use images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png>
 Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas
 s.png  used on input line 698.
 (pdftex.def)             Requested size: 455.30783pt x 404.71806pt.
 
 Overfull \hbox (37.62137pt too wide) in paragraph at lines 698--699
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [148 <./images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png (PNG copy)>]
 Overfull \vbox (211.22089pt too high) has occurred while \output is active []
 
 
 [149 <./images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png (PNG copy)> <./im
 ages/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png (PNG copy)>]
-<images/ch_atac-seq/data_classPU1_2class.png, id=2688, 1445.4pt x 722.7pt>
+<images/ch_atac-seq/data_classPU1_2class.png, id=2658, 1445.4pt x 722.7pt>
 File: images/ch_atac-seq/data_classPU1_2class.png Graphic file (type png)
 <use images/ch_atac-seq/data_classPU1_2class.png>
 Package pdftex.def Info: images/ch_atac-seq/data_classPU1_2class.png  used on i
 nput line 706.
 (pdftex.def)             Requested size: 433.62335pt x 216.81166pt.
 
 Overfull \hbox (15.93689pt too wide) in paragraph at lines 706--707
  [] 
  []
 
 
 Overfull \vbox (26.60411pt too high) has occurred while \output is active []
 
 
 [150 <./images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png (PNG copy)> <./ima
 ges/ch_atac-seq/data_classPU1_2class.png (PNG copy)>]
-<images/ch_atac-seq/data_classjun_3class.png, id=2693, 1445.4pt x 722.7pt>
+<images/ch_atac-seq/data_classjun_3class.png, id=2663, 1445.4pt x 722.7pt>
 File: images/ch_atac-seq/data_classjun_3class.png Graphic file (type png)
 <use images/ch_atac-seq/data_classjun_3class.png>
 Package pdftex.def Info: images/ch_atac-seq/data_classjun_3class.png  used on i
 nput line 714.
 (pdftex.def)             Requested size: 433.62335pt x 216.81166pt.
 
 Overfull \hbox (15.93689pt too wide) in paragraph at lines 714--715
  [] 
  []
 
 ) [151 <./images/ch_atac-seq/sp1_motifs_6class_shift_flip.png (PNG copy)> <./im
 ages/ch_atac-seq/data_classjun_3class.png (PNG copy)>] [152
 
 
 ]
 \openout2 = `tail/biblio.aux'.
 
  (./tail/biblio.tex (./my_thesis.bbl [153
 
 
 
 ] [154] [155] [156] [157] [158] [159] [160] [161]
 [162] [163] [164])) [165]
 \openout2 = `tail/cv.aux'.
 
  (./tail/cv.tex [166
 
 
 
 ]
-<tail/cv_en.pdf, id=2762, 597.50786pt x 845.01837pt>
+<tail/cv_en.pdf, id=2732, 597.50786pt x 845.01837pt>
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf>
 Package pdftex.def Info: tail/cv_en.pdf  used on input line 6.
 (pdftex.def)             Requested size: 597.5064pt x 845.01631pt.
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf>
 Package pdftex.def Info: tail/cv_en.pdf  used on input line 6.
 (pdftex.def)             Requested size: 597.5064pt x 845.01631pt.
-<tail/cv_en.pdf, id=2765, page=1, 597.50786pt x 845.01837pt>
+<tail/cv_en.pdf, id=2735, page=1, 597.50786pt x 845.01837pt>
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 1>
 Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.5064pt x 845.01631pt.
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 1>
 Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 1>
 Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 1>
 Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 1>
 Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
  [167 <./tail/cv_en.pdf>]
-<tail/cv_en.pdf, id=2788, page=2, 597.50786pt x 845.01837pt>
+<tail/cv_en.pdf, id=2758, page=2, 597.50786pt x 845.01837pt>
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 2>
 Package pdftex.def Info: tail/cv_en.pdf , page2 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 2>
 Package pdftex.def Info: tail/cv_en.pdf , page2 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 2>
 Package pdftex.def Info: tail/cv_en.pdf , page2 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
 
 [168 <./tail/cv_en.pdf>]
-<tail/cv_en.pdf, id=2794, page=3, 597.50786pt x 845.01837pt>
+<tail/cv_en.pdf, id=2764, page=3, 597.50786pt x 845.01837pt>
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 3>
 Package pdftex.def Info: tail/cv_en.pdf , page3 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 3>
 Package pdftex.def Info: tail/cv_en.pdf , page3 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
 File: tail/cv_en.pdf Graphic file (type pdf)
 <use tail/cv_en.pdf, page 3>
 Package pdftex.def Info: tail/cv_en.pdf , page3 used on input line 6.
 (pdftex.def)             Requested size: 597.53374pt x 845.055pt.
  [169 <./tail/cv_en.pdf>])
-
-Package natbib Warning: There were undefined citations.
-
 Package atveryend Info: Empty hook `BeforeClearDocument' on input line 80.
 Package atveryend Info: Empty hook `AfterLastShipout' on input line 80.
-(./my_thesis.aux (./head/dedication.aux) (./head/acknowledgements.aux)
-(./head/abstracts.aux) (./main/ch_introduction.aux)
-(./main/ch_lab_resources.aux) (./main/ch_encode_peaks.aux) (./main/ch_spark.aux
-) (./main/ch_smile-seq.aux) (./main/ch_pwmscan.aux) (./main/ch_atac-seq.aux)
-(./main/ch_discussion.aux) (./tail/appendix.aux) (./tail/biblio.aux)
-(./tail/cv.aux))
+ (./my_thesis.aux
+(./head/dedication.aux) (./head/acknowledgements.aux) (./head/abstracts.aux)
+(./main/ch_introduction.aux) (./main/ch_lab_resources.aux)
+(./main/ch_encode_peaks.aux) (./main/ch_spark.aux) (./main/ch_smile-seq.aux)
+(./main/ch_pwmscan.aux) (./main/ch_atac-seq.aux) (./main/ch_discussion.aux)
+(./tail/appendix.aux) (./tail/biblio.aux) (./tail/cv.aux))
 Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 80.
 Package atveryend Info: Empty hook `AtEndAfterFileList' on input line 80.
 
 
 LaTeX Warning: There were multiply-defined labels.
 
 Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 80.
  ) 
 Here is how much of TeX's memory you used:
- 42063 strings out of 492982
- 820563 string characters out of 6134895
- 1057797 words of memory out of 5000000
- 43568 multiletter control sequences out of 15000+600000
+ 42062 strings out of 492982
+ 820396 string characters out of 6134895
+ 1063875 words of memory out of 5000000
+ 43567 multiletter control sequences out of 15000+600000
  732775 words of font info for 397 fonts, out of 8000000 for 9000
  1141 hyphenation exceptions out of 8191
  73i,24n,99p,10424b,1319s stack positions out of 5000i,500n,10000p,200000b,80000s
 {/usr/share/texmf/fonts/enc/dvips/lm/lm-ec.enc}{/usr/share/texlive/texmf-dist
 /fonts/enc/dvips/base/8r.enc}</usr/share/texlive/texmf-dist/fonts/type1/public/
 fourier/fourier-mcl.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/fouri
 er/fourier-mex.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/fourier/fo
 urier-ml.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/fourier/fourier-
 mlit.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/fourier/fourier-ms.p
 fb></usr/share/texmf/fonts/type1/public/lm/lmss10.pfb></usr/share/texmf/fonts/t
 ype1/public/lm/lmss17.pfb></usr/share/texmf/fonts/type1/public/lm/lmtt10.pfb></
 usr/share/texlive/texmf-dist/fonts/type1/adobe/utopia/putb8a.pfb></usr/share/te
 xlive/texmf-dist/fonts/type1/adobe/utopia/putr8a.pfb></usr/share/texlive/texmf-
 dist/fonts/type1/adobe/utopia/putri8a.pfb>
-Output written on my_thesis.pdf (183 pages, 108761454 bytes).
+Output written on my_thesis.pdf (183 pages, 81807506 bytes).
 PDF statistics:
- 3405 PDF objects out of 3580 (max. 8388607)
- 3013 compressed objects within 31 object streams
+ 3374 PDF objects out of 3580 (max. 8388607)
+ 2984 compressed objects within 30 object streams
  905 named destinations out of 1000 (max. 500000)
  38683 words of extra memory for PDF output out of 42996 (max. 10000000)
 
diff --git a/my_thesis.pdf b/my_thesis.pdf
index 5340b2f..b7b9469 100644
Binary files a/my_thesis.pdf and b/my_thesis.pdf differ
diff --git a/my_thesis.synctex.gz b/my_thesis.synctex.gz
index e03e105..d4ae83c 100644
Binary files a/my_thesis.synctex.gz and b/my_thesis.synctex.gz differ
diff --git a/my_thesis.toc b/my_thesis.toc
index 7444e6c..313d57d 100644
--- a/my_thesis.toc
+++ b/my_thesis.toc
@@ -1,135 +1,135 @@
 \babel@toc {english}{}
 \babel@toc {french}{}
 \babel@toc {english}{}
 \contentsline {chapter}{Acknowledgements}{i}{chapter*.1}
 \contentsline {chapter}{Abstract (English/Fran\IeC {\c c}ais/Deutsch)}{iii}{chapter*.2}
 \babel@toc {french}{}
 \babel@toc {english}{}
 \contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}
 \contentsline {chapter}{Introduction}{1}{chapter.1}
 \contentsline {section}{\numberline {1.1}About chromatin}{1}{section.1.1}
 \contentsline {subsection}{\numberline {1.1.1}The chromatin structure}{2}{subsection.1.1.1}
 \contentsline {subsection}{\numberline {1.1.2}The chromatin is dynamic}{2}{subsection.1.1.2}
 \contentsline {subsection}{\numberline {1.1.3}About nucleosome positioning}{4}{subsection.1.1.3}
 \contentsline {section}{\numberline {1.2}About transcription factors}{7}{section.1.2}
 \contentsline {subsection}{\numberline {1.2.1}TF co-binding}{7}{subsection.1.2.1}
 \contentsline {section}{\numberline {1.3}Gene regulation in a nutshell}{9}{section.1.3}
 \contentsline {subsection}{\numberline {1.3.1}The chromatin barrier}{9}{subsection.1.3.1}
 \contentsline {subsection}{\numberline {1.3.2}TFs cooperative binding}{9}{subsection.1.3.2}
 \contentsline {subsection}{\numberline {1.3.3}Pioneer TFs}{10}{subsection.1.3.3}
 \contentsline {subsection}{\numberline {1.3.4}Regulatory elements}{10}{subsection.1.3.4}
 \contentsline {subsection}{\numberline {1.3.5}The genome goes 3D}{11}{subsection.1.3.5}
 \contentsline {section}{\numberline {1.4}Measuring chromatin features}{12}{section.1.4}
 \contentsline {subsection}{\numberline {1.4.1}Measuring TF binding in vivo}{12}{subsection.1.4.1}
 \contentsline {subsection}{\numberline {1.4.2}Measuring TF binding in vitro}{13}{subsection.1.4.2}
 \contentsline {subsection}{\numberline {1.4.3}Measuring nucleosome occupancy}{14}{subsection.1.4.3}
 \contentsline {subsection}{\numberline {1.4.4}Digital footprinting}{15}{subsection.1.4.4}
 \contentsline {section}{\numberline {1.5}Modeling sequence specificity}{17}{section.1.5}
 \contentsline {subsubsection}{The physics approach to PWMs}{17}{section.1.5}
 \contentsline {subsubsection}{The statistical mechanic approach to PWMs}{18}{equation.1.5.2}
 \contentsline {subsection}{\numberline {1.5.1}Aligning binding sites}{19}{subsection.1.5.1}
 \contentsline {subsection}{\numberline {1.5.2}Platitudes}{20}{subsection.1.5.2}
 \contentsline {subsection}{\numberline {1.5.3}Predicting binding sites}{20}{subsection.1.5.3}
 \contentsline {section}{\numberline {1.6}Over-represented patterns discovery}{21}{section.1.6}
 \contentsline {chapter}{\numberline {2}Laboratory resources}{25}{chapter.2}
 \contentsline {chapter}{Laboratory resources}{25}{chapter.2}
 \contentsline {section}{\numberline {2.1}Mass Genome Annotation repository}{25}{section.2.1}
 \contentsline {subsection}{\numberline {2.1.1}MGA content and organization}{26}{subsection.2.1.1}
 \contentsline {subsection}{\numberline {2.1.2}Conclusions}{27}{subsection.2.1.2}
 \contentsline {section}{\numberline {2.2}Eukaryotic Promoter Database}{28}{section.2.2}
 \contentsline {subsection}{\numberline {2.2.1}EPDnew now annotates (some of) your mushrooms and vegetables}{29}{subsection.2.2.1}
 \contentsline {subsection}{\numberline {2.2.2}Increased mapping precision in human}{30}{subsection.2.2.2}
 \contentsline {subsection}{\numberline {2.2.3}Integration of EPDnew with other resources}{30}{subsection.2.2.3}
 \contentsline {subsection}{\numberline {2.2.4}Conclusions}{31}{subsection.2.2.4}
 \contentsline {subsection}{\numberline {2.2.5}Methods}{31}{subsection.2.2.5}
 \contentsline {subsubsection}{Motif occurrence profiles}{31}{subsection.2.2.5}
 \contentsline {chapter}{\numberline {3}ENCODE peaks analysis}{33}{chapter.3}
 \contentsline {chapter}{ENCODE peaks analysis}{33}{chapter.3}
 \contentsline {section}{\numberline {3.1}Data}{33}{section.3.1}
 \contentsline {section}{\numberline {3.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{35}{section.3.2}
 \contentsline {subsection}{\numberline {3.2.1}Data realignment}{36}{subsection.3.2.1}
 \contentsline {section}{\numberline {3.3}Nucleosome organization around transcription factor binding sites}{37}{section.3.3}
-\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{39}{section.3.4}
+\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{42}{section.3.4}
 \contentsline {section}{\numberline {3.5}CTCF and JunD interactomes}{43}{section.3.5}
 \contentsline {section}{\numberline {3.6}EBF1 binds nucleosomes}{47}{section.3.6}
 \contentsline {section}{\numberline {3.7}Discussion}{50}{section.3.7}
 \contentsline {section}{\numberline {3.8}Methods}{50}{section.3.8}
 \contentsline {subsection}{\numberline {3.8.1}Data and data processing}{50}{subsection.3.8.1}
 \contentsline {subsection}{\numberline {3.8.2}Classification of MNase patterns}{51}{subsection.3.8.2}
 \contentsline {subsection}{\numberline {3.8.3}Quantifying nucleosome array intensity from classification results}{52}{subsection.3.8.3}
 \contentsline {subsection}{\numberline {3.8.4}Peak colocalization}{53}{subsection.3.8.4}
 \contentsline {subsection}{\numberline {3.8.5}NDR detection}{54}{subsection.3.8.5}
 \contentsline {subsection}{\numberline {3.8.6}CTCF and JunD interactors}{56}{subsection.3.8.6}
 \contentsline {subsection}{\numberline {3.8.7}EBF1 and nucleosome}{57}{subsection.3.8.7}
 \contentsline {chapter}{\numberline {4}SPar-K}{59}{chapter.4}
 \contentsline {section}{\numberline {4.1}Algorithm}{59}{section.4.1}
 \contentsline {section}{\numberline {4.2}Implementation}{60}{section.4.2}
-\contentsline {section}{\numberline {4.3}Benchmarking}{61}{section.4.3}
-\contentsline {subsection}{\numberline {4.3.1}K-means}{61}{subsection.4.3.1}
+\contentsline {section}{\numberline {4.3}Benchmarking}{64}{section.4.3}
+\contentsline {subsection}{\numberline {4.3.1}K-means}{64}{subsection.4.3.1}
 \contentsline {subsection}{\numberline {4.3.2}ChIPPartitioning}{64}{subsection.4.3.2}
 \contentsline {subsection}{\numberline {4.3.3}Data}{64}{subsection.4.3.3}
 \contentsline {subsection}{\numberline {4.3.4}Performances}{65}{subsection.4.3.4}
 \contentsline {section}{\numberline {4.4}Partition of DNase and MNase data}{65}{section.4.4}
-\contentsline {section}{\numberline {4.5}Conclusions}{65}{section.4.5}
+\contentsline {section}{\numberline {4.5}Conclusions}{68}{section.4.5}
 \contentsline {chapter}{\numberline {5}SMiLE-seq data analysis}{69}{chapter.5}
 \contentsline {chapter}{SMiLE-seq data analysis}{69}{chapter.5}
 \contentsline {section}{\numberline {5.1}Introduction}{69}{section.5.1}
 \contentsline {section}{\numberline {5.2}Hidden Markov Model Motif discovery}{71}{section.5.2}
 \contentsline {section}{\numberline {5.3}Binding motif evaluation}{72}{section.5.3}
 \contentsline {section}{\numberline {5.4}Results}{73}{section.5.4}
 \contentsline {section}{\numberline {5.5}Conclusions}{75}{section.5.5}
 \contentsline {chapter}{\numberline {6}PWMScan}{77}{chapter.6}
 \contentsline {section}{\numberline {6.1}Algorithms}{77}{section.6.1}
 \contentsline {subsection}{\numberline {6.1.1}Scanner algorithm}{78}{subsection.6.1.1}
 \contentsline {subsection}{\numberline {6.1.2}Matches enumeration and mapping}{78}{subsection.6.1.2}
 \contentsline {section}{\numberline {6.2}PMWScan architecture}{79}{section.6.2}
 \contentsline {section}{\numberline {6.3}Benchmark}{81}{section.6.3}
 \contentsline {section}{\numberline {6.4}Conclusions}{83}{section.6.4}
 \contentsline {chapter}{\numberline {7}Chromatin accessibility of monocytes}{85}{chapter.7}
 \contentsline {section}{\numberline {7.1}Monitoring TF binding}{85}{section.7.1}
 \contentsline {section}{\numberline {7.2}The advent of single cell DGF}{86}{section.7.2}
 \contentsline {section}{\numberline {7.3}Open issues}{86}{section.7.3}
 \contentsline {section}{\numberline {7.4}Data}{86}{section.7.4}
 \contentsline {section}{\numberline {7.5}Identifying over-represented signals}{87}{section.7.5}
 \contentsline {subsection}{\numberline {7.5.1}ChIPPartitioning algorithm}{87}{subsection.7.5.1}
 \contentsline {subsection}{\numberline {7.5.2}EMSequence algorithm}{87}{subsection.7.5.2}
 \contentsline {subsubsection}{without shift and flip}{89}{figure.caption.35}
 \contentsline {subsubsection}{with shift and flip}{89}{equation.7.5.2}
 \contentsline {subsection}{\numberline {7.5.3}EMJoint algorithm}{91}{subsection.7.5.3}
 \contentsline {subsection}{\numberline {7.5.4}Data realignment}{92}{subsection.7.5.4}
 \contentsline {subsection}{\numberline {7.5.5}Soft aggregation plots}{92}{subsection.7.5.5}
 \contentsline {section}{\numberline {7.6}Data processing}{93}{section.7.6}
 \contentsline {section}{\numberline {7.7}Results}{93}{section.7.7}
 \contentsline {subsection}{\numberline {7.7.1}Aligning the binding sites}{93}{subsection.7.7.1}
 \contentsline {subsection}{\numberline {7.7.2}Exploring individual TF classes}{95}{subsection.7.7.2}
 \contentsline {section}{\numberline {7.8}Discussions}{97}{section.7.8}
 \contentsline {section}{\numberline {7.9}Perspectives}{97}{section.7.9}
 \contentsline {section}{\numberline {7.10}Methods}{98}{section.7.10}
 \contentsline {subsection}{\numberline {7.10.1}Code availability}{98}{subsection.7.10.1}
 \contentsline {subsection}{\numberline {7.10.2}Data sources}{99}{subsection.7.10.2}
 \contentsline {subsection}{\numberline {7.10.3}Data post-processing}{99}{subsection.7.10.3}
 \contentsline {subsection}{\numberline {7.10.4}Model extension}{100}{subsection.7.10.4}
 \contentsline {subsection}{\numberline {7.10.5}Extracting data assigned to a class}{100}{subsection.7.10.5}
 \contentsline {subsection}{\numberline {7.10.6}Programs}{103}{subsection.7.10.6}
 \contentsline {subsection}{\numberline {7.10.7}Fragment classes}{104}{subsection.7.10.7}
 \contentsline {subsection}{\numberline {7.10.8}Simulated sequences}{105}{subsection.7.10.8}
 \contentsline {subsection}{\numberline {7.10.9}Binding site prediction}{105}{subsection.7.10.9}
 \contentsline {subsection}{\numberline {7.10.10}Realignment using JASPAR motifs}{106}{subsection.7.10.10}
 \contentsline {subsection}{\numberline {7.10.11}Per TF sub-classes}{108}{subsection.7.10.11}
 \contentsline {chapter}{\numberline {8}Discussion}{111}{chapter.8}
 \contentsline {chapter}{Discussions}{111}{chapter.8}
 \vspace {\normalbaselineskip }
 \contentsline {chapter}{\numberline {A}Supplementary material}{115}{appendix.A}
 \contentsline {section}{\numberline {A.1}ENCODE peaks analysis supplementary material}{116}{section.A.1}
 \contentsline {section}{\numberline {A.2}SPar-K supplementary material}{126}{section.A.2}
 \contentsline {section}{\numberline {A.3}SMiLE-seq supplementary material}{139}{section.A.3}
 \contentsline {section}{\numberline {A.4}Chromatin accessibility of monocytes supplementary material}{139}{section.A.4}
 \contentsline {subsection}{\numberline {A.4.1}Fragment size analysis}{139}{subsection.A.4.1}
 \contentsline {subsection}{\numberline {A.4.2}Measuring open chromatin and nucleosome occupancy}{140}{subsection.A.4.2}
 \contentsline {subsection}{\numberline {A.4.3}Evaluation of EMSequence and ChIPPartitioning}{143}{subsection.A.4.3}
 \contentsline {subsubsection}{EMSequence}{143}{subsection.A.4.3}
 \contentsline {subsubsection}{ChIPPartitioning}{146}{figure.caption.56}
 \contentsline {subsection}{\numberline {A.4.4}Other supplementary figures}{149}{subsection.A.4.4}
 \contentsline {chapter}{Bibliography}{153}{section*.64}
 \contentsline {chapter}{Bibliography}{165}{appendix*.65}
 \contentsline {chapter}{Curriculum Vitae}{167}{section*.66}
diff --git a/scripts/ch_atac-seq/figure_ctcf_6classes.R b/scripts/ch_atac-seq/figure_ctcf_6classes.R
index ce64495..50025c6 100644
--- a/scripts/ch_atac-seq/figure_ctcf_6classes.R
+++ b/scripts/ch_atac-seq/figure_ctcf_6classes.R
@@ -1,172 +1,172 @@
 
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
 # paths
 data.dir = file.path("/", "local", "groux", "scATAC-seq", "results")
 dest.dir = file.path("/", "local", "groux", "phd_thesis", "scATAC-seq", "figures")
 
 # colors
 col = brewer.pal(3, "Set1")
 
 
 ##################################### ctcf with flip only ##################################### 
 # open chromatin
 data       = read.read.models(file.path(data.dir,
                                         "10xgenomics_PBMC_5k_motifs_classification_0", 
                                         "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_model.mat"))
 model.open = data$models
 model.prob = data$prob
 data = NULL
 # nucleosomes
 model.nucl = read.read.models(file.path(data.dir,
                                         "10xgenomics_PBMC_5k_motifs_classification_0",
                                         "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_nucleosomes_fragment_center_model.mat"))$models
 # sequence
 model.seq  = read.sequence.models(file.path(data.dir,
                                             "10xgenomics_PBMC_5k_motifs_classification_0",
                                             "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_sequences_model.mat"))$models
 
 png(filename=file.path(dest.dir,
                        "ctcf_motifs_6class_noshift_flip.png"),
-    units="in", res=720, width=18, height=8)
+    units="in", res=480, width=18, height=8)
 
   m = matrix(1:6, nrow=3, ncol=2, byrow=F)
   layout(m)
   # order from most to least probable class
   ord      = order(model.prob, decreasing=T)
   ref.open = model.open[ord,, drop=F]
   ref.nucl = model.nucl[ord,, drop=F]
   ref.seq  = model.seq[,,ord, drop=F]
   prob     = model.prob[ord]
   class    = c(1:nrow(ref.open))[ord]
   for(i in 1:nrow(ref.open))
   { # plot logo
     plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
               main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
     # x-axis
     x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
     x.at  = (x.lab + ncol(ref.open)) / 2
     axis(1, at=x.at, labels=x.lab)
     # y-axis is [0,2] because these are bits but
     # label it [0,1] for min/max signal
     x.at = seq(0, 2, 1)
     axis(2, at=x.at, labels=0.5*x.at)
     # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
     lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=2, col=col[1])
     lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=2, col=col[2])
   }
   row_n = 1 # row counter
   col_n = 1 # column counter
   for(i in 1:nrow(ref.open))
   { # plot logo center
     right  = 0.5*col_n - 0.01
     left   = right - 0.2
     bottom = 1-(row_n*(1/3))+0.1
     top    = bottom + 0.2
     par(fig=c(left, right, bottom, top), new=T)
     idx = 380:420
     plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
     # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
     lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=2, col=col[1])
     lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=2, col=col[2])
     # xaxis
     x.at = 1:length(idx)
     axis(1, at=x.at, labels=x.at)
     # yaxis
     x.at = seq(0, 2, by=1)
     axis(2, at=x.at, labels=x.at)
     row_n = row_n + 1
     if(i %% 3 == 0)
     { col_n = col_n + 1
       row_n = 1
     }
   }
 dev.off()
 
 
 ##################################### ctcf with flip and shift  ##################################### 
 # open chromatin
 data       = read.read.models(file.path(data.dir,
                                         "10xgenomics_PBMC_5k_motifs_classification_1", 
                                         "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_model.mat"))
 model.open = data$models
 model.prob = data$prob
 data = NULL
 # nucleosomes
 model.nucl = read.read.models(file.path(data.dir,
                                         "10xgenomics_PBMC_5k_motifs_classification_1",
                                         "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_1nucl_fragment_center_model.mat"))$models
 # sequence
 model.seq  = read.sequence.models(file.path(data.dir,
                                             "10xgenomics_PBMC_5k_motifs_classification_1",
                                             "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_sequences_model.mat"))$models
 
 png(filename=file.path(dest.dir,
                        "ctcf_motifs_6class_shift_flip.png"),
-    units="in", res=720, width=18, height=8)
+    units="in", res=480, width=18, height=8)
 
   m = matrix(1:6, nrow=3, ncol=2, byrow=F)
   layout(m)
   # order from most to least probable class
   ord      = order(model.prob, decreasing=T)
   ref.open = model.open[ord,, drop=F]
   ref.nucl = model.nucl[ord,, drop=F]
   ref.seq  = model.seq[,,ord, drop=F]
   prob     = model.prob[ord]
   class    = c(1:nrow(ref.open))[ord]
   for(i in 1:nrow(ref.open))
   { # plot logo
     plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
               main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
     # x-axis
     x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
     x.at  = (x.lab + ncol(ref.open)) / 2
     axis(1, at=x.at, labels=x.lab)
     # y-axis is [0,2] because these are bits but
     # label it [0,1] for min/max signal
     x.at = seq(0, 2, 1)
     axis(2, at=x.at, labels=0.5*x.at)
     # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
     lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=2, col=col[1])
     lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=2, col=col[2])
   }
   row_n = 1 # row counter
   col_n = 1 # column counter
   for(i in 1:nrow(ref.open))
   { # plot logo center
     right  = 0.5*col_n - 0.01
     left   = right - 0.2
     bottom = 1-(row_n*(1/3))+0.1
     top    = bottom + 0.2
     par(fig=c(left, right, bottom, top), new=T)
     idx = 380:420
     plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
     # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
     lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=2, col=col[1])
     lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=2, col=col[2])
     # xaxis
     x.at = 1:length(idx)
     axis(1, at=x.at, labels=x.at)
     # yaxis
     x.at = seq(0, 2, by=1)
     axis(2, at=x.at, labels=x.at)
     row_n = row_n + 1
     if(i %% 3 == 0)
     { col_n = col_n + 1
       row_n = 1
     }
   }
 dev.off()
 
diff --git a/scripts/ch_atac-seq/figure_ctcf_sp1_myc_ebf1_footprint.R b/scripts/ch_atac-seq/figure_ctcf_sp1_myc_ebf1_footprint.R
index a3ad4dd..6d68585 100644
--- a/scripts/ch_atac-seq/figure_ctcf_sp1_myc_ebf1_footprint.R
+++ b/scripts/ch_atac-seq/figure_ctcf_sp1_myc_ebf1_footprint.R
@@ -1,109 +1,109 @@
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 data.dir = file.path("/", "local", "groux", "scATAC-seq", "data", "10xgenomics_PBMC_5k_motifs")
 dest.dir = file.path("/", "local", "groux", "phd_thesis", "scATAC-seq", "figures")
 
 # CTCF data
 ## open chromatin
 ctcf.open.1.atac  = as.matrix(read.table(file.path(data.dir, "ctcf_motifs_10e-6_open_bin1bp_read_atac.mat")))
 ## nucleosomes
 ctcf.nucl.1.cent  = as.matrix(read.table(file.path(data.dir, "ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat")))
 
 ctcf.open.1.atac = colMeans(ctcf.open.1.atac) / max(colMeans(ctcf.open.1.atac))
 ctcf.nucl.1.cent = colMeans(ctcf.nucl.1.cent) / max(colMeans(ctcf.nucl.1.cent))
 
 
 # SP1 data
 ## open chromatin
 sp1.open.1.atac  = as.matrix(read.table(file.path(data.dir, "sp1_motifs_10e-7_open_bin1bp_read_atac.mat")))
 ## nucleosomes
 sp1.nucl.1.cent  = as.matrix(read.table(file.path(data.dir, "sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center.mat")))
 
 sp1.open.1.atac = colMeans(sp1.open.1.atac) / max(colMeans(sp1.open.1.atac))
 sp1.nucl.1.cent = colMeans(sp1.nucl.1.cent) / max(colMeans(sp1.nucl.1.cent))
 
 
 # myc data
 ## open chromatin
 myc.open.1.atac  = as.matrix(read.table(file.path(data.dir, "myc_motifs_10e-6_open_bin1bp_read_atac.mat")))
 ## nucleosomes
 myc.nucl.1.cent  = as.matrix(read.table(file.path(data.dir, "myc_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat")))
 
 myc.open.1.atac = colMeans(myc.open.1.atac) / max(colMeans(myc.open.1.atac))
 myc.nucl.1.cent = colMeans(myc.nucl.1.cent) / max(colMeans(myc.nucl.1.cent))
 
 
 # EBF1 data
 ## open chromatin
 ebf1.open.1.atac  = as.matrix(read.table(file.path(data.dir, "ebf1_motifs_10e-6_open_bin1bp_read_atac.mat")))
 ## nucleosomes
 ebf1.nucl.1.cent  = as.matrix(read.table(file.path(data.dir, "ebf1_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat")))
 
 ebf1.open.1.atac = colMeans(ebf1.open.1.atac) / max(colMeans(ebf1.open.1.atac))
 ebf1.nucl.1.cent = colMeans(ebf1.nucl.1.cent) / max(colMeans(ebf1.nucl.1.cent))
 
 
 # colors
 col = brewer.pal(4, "Set1")
 
 # display center only
 idx = 200:600 
 
 # x-axis
 axis.at   = seq(-200, 200, length.out=3)  + 200
 axis.lab  = seq(-200, 200, by=200)
 
 y.lim = c(0,1)
 
 
 # X11(width=18, height=9)
 png(filename=file.path(dest.dir, "ctcf_sp1_myc_ebf1_footprint.png"),
-    units="in", res=720, width=18, height=9)
+    units="in", res=480, width=18, height=9)
   m = matrix(nrow=2, ncol=2,
              data=c(1,3,
                     2,4),
              byrow=T)
   l = layout(mat=m, widths=c(1,1), heights=c(1,1))
 
   p = par(mar=c(5.1, 5.1, 4.1, 2.1))
   
   # CTCF
   plot(ctcf.open.1.atac[idx], col=col[1], lwd=3, type='l',
        main="CTCF motif", xlab="pos[bp]", ylab="Prop max. signal", xaxt='n', ylim=y.lim,
        cex.axis=2, cex.lab=2, cex.main=1.8)
   lines(ctcf.nucl.1.cent[idx],  col=col[2], lwd=3)
   axis(side=1, at=axis.at, labels=axis.lab, cex.axis=1.8)
   abline(v=191, lty=2, lwd=3)
   abline(v=210, lty=2, lwd=3)
   # SP1
   plot(sp1.open.1.atac[idx], col=col[1], lwd=3, type='l',
        main="SP1 motif", xlab="pos[bp]", ylab="Prop max. signal", xaxt='n', ylim=y.lim,
        cex.axis=2, cex.lab=2, cex.main=1.8)
   lines(sp1.nucl.1.cent[idx],  col=col[2], lwd=3)
   axis(side=1, at=axis.at, labels=axis.lab, cex.axis=1.8)
   abline(v=194, lty=2, lwd=3)
   abline(v=210, lty=2, lwd=3)
   # myc
   plot(myc.open.1.atac[idx], col=col[1], lwd=3, type='l',
        main="myc motif", xlab="pos[bp]", ylab="Prop max. signal", xaxt='n', ylim=y.lim,
        cex.axis=2, cex.lab=2, cex.main=1.8)
   lines(myc.nucl.1.cent[idx],  col=col[2], lwd=3)
   axis(side=1, at=axis.at, labels=axis.lab, cex.axis=1.8)
   abline(v=195, lty=2, lwd=3)
   abline(v=209, lty=2, lwd=3)
   # EBF1
   plot(ebf1.open.1.atac[idx], col=col[1], lwd=3, type='l',
        main="EBF1 motif", xlab="pos[bp]", ylab="Prop max. signal", xaxt='n', ylim=y.lim,
        cex.axis=2, cex.lab=2, cex.main=1.8)
   lines(ebf1.nucl.1.cent[idx],  col=col[2], lwd=3)
   axis(side=1, at=axis.at, labels=axis.lab, cex.axis=1.8)
   abline(v=197, lty=2, lwd=3)
   abline(v=206, lty=2, lwd=3)
 dev.off()
- 
\ No newline at end of file
+ 
diff --git a/scripts/ch_atac-seq/figure_sp1_6classes.R b/scripts/ch_atac-seq/figure_sp1_6classes.R
index 59e1a7e..32f2a22 100644
--- a/scripts/ch_atac-seq/figure_sp1_6classes.R
+++ b/scripts/ch_atac-seq/figure_sp1_6classes.R
@@ -1,172 +1,172 @@
 
 setwd(file.path("/", "local", "groux", "scATAC-seq"))
 
 # libraries
 library(RColorBrewer)
 
 # functions
 source(file.path("scripts", "functions.R"))
 
 # path to the images for the logo
 path.a = file.path("res/A.png")
 path.c = file.path("res/C.png")
 path.g = file.path("res/G.png")
 path.t = file.path("res/T.png") 
 
 # paths
 data.dir = file.path("/", "local", "groux", "scATAC-seq", "results")
-dest.dir = file.path("/", "local", "groux", "phd_thesis", "scATAC-seq", "figures")
+dest.dir = file.path("/", "local", "groux", "phd_thesis", "images", "ch_atac-seq")
 
 # colors
 col = brewer.pal(3, "Set1")
 
 
 ##################################### sp1 with flip only ##################################### 
 # open chromatin
 data       = read.read.models(file.path(data.dir,
                                         "10xgenomics_PBMC_5k_motifs_classification_0", 
                                         "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_model.mat"))
 model.open = data$models
 model.prob = data$prob
 data = NULL
 # nucleosomes
 model.nucl = read.read.models(file.path(data.dir,
                                         "10xgenomics_PBMC_5k_motifs_classification_0",
                                         "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_nucleosomes_fragment_center_model.mat"))$models
 # sequence
 model.seq  = read.sequence.models(file.path(data.dir,
                                             "10xgenomics_PBMC_5k_motifs_classification_0",
                                             "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_sequences_model.mat"))$models
 
 png(filename=file.path(dest.dir,
                        "sp1_motifs_6class_noshift_flip.png"),
-    units="in", res=720, width=18, height=8)
+    units="in", res=480, width=18, height=8)
 
   m = matrix(1:6, nrow=3, ncol=2, byrow=F)
   layout(m)
   # order from most to least probable class
   ord      = order(model.prob, decreasing=T)
   ref.open = model.open[ord,, drop=F]
   ref.nucl = model.nucl[ord,, drop=F]
   ref.seq  = model.seq[,,ord, drop=F]
   prob     = model.prob[ord]
   class    = c(1:nrow(ref.open))[ord]
   for(i in 1:nrow(ref.open))
   { # plot logo
     plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
               main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
     # x-axis
     x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
     x.at  = (x.lab + ncol(ref.open)) / 2
     axis(1, at=x.at, labels=x.lab)
     # y-axis is [0,2] because these are bits but
     # label it [0,1] for min/max signal
     x.at = seq(0, 2, 1)
     axis(2, at=x.at, labels=0.5*x.at)
     # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
     lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=2, col=col[1])
     lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=2, col=col[2])
   }
   row_n = 1 # row counter
   col_n = 1 # column counter
   for(i in 1:nrow(ref.open))
   { # plot logo center
     right  = 0.5*col_n - 0.01
     left   = right - 0.2
     bottom = 1-(row_n*(1/3))+0.1
     top    = bottom + 0.2
     par(fig=c(left, right, bottom, top), new=T)
     idx = 380:420
     plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
     # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
     lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=2, col=col[1])
     lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=2, col=col[2])
     # xaxis
     x.at = 1:length(idx)
     axis(1, at=x.at, labels=x.at)
     # yaxis
     x.at = seq(0, 2, by=1)
     axis(2, at=x.at, labels=x.at)
     row_n = row_n + 1
     if(i %% 3 == 0)
     { col_n = col_n + 1
     row_n = 1
     }
   }
 dev.off()
 
 
 ##################################### sp1 with flip and shift  ##################################### 
 # open chromatin
 data       = read.read.models(file.path(data.dir,
                                         "10xgenomics_PBMC_5k_motifs_classification_1", 
                                         "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_model.mat"))
 model.open = data$models
 model.prob = data$prob
 data = NULL
 # nucleosomes
 model.nucl = read.read.models(file.path(data.dir,
                                         "10xgenomics_PBMC_5k_motifs_classification_1",
                                         "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_1nucl_fragment_center_model.mat"))$models
 # sequence
 model.seq  = read.sequence.models(file.path(data.dir,
                                             "10xgenomics_PBMC_5k_motifs_classification_1",
                                             "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_sequences_model.mat"))$models
 
 png(filename=file.path(dest.dir,
                        "sp1_motifs_6class_shift_flip.png"),
-    units="in", res=720, width=18, height=8)
+    units="in", res=480, width=18, height=8)
 
   m = matrix(1:6, nrow=3, ncol=2, byrow=F)
   layout(m)
   # order from most to least probable class
   ord      = order(model.prob, decreasing=T)
   ref.open = model.open[ord,, drop=F]
   ref.nucl = model.nucl[ord,, drop=F]
   ref.seq  = model.seq[,,ord, drop=F]
   prob     = model.prob[ord]
   class    = c(1:nrow(ref.open))[ord]
   for(i in 1:nrow(ref.open))
   { # plot logo
     plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t,
               main=sprintf("class %d (p=%.2f)", class[i], prob[i]))
     # x-axis
     x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3)
     x.at  = (x.lab + ncol(ref.open)) / 2
     axis(1, at=x.at, labels=x.lab)
     # y-axis is [0,2] because these are bits but
     # label it [0,1] for min/max signal
     x.at = seq(0, 2, 1)
     axis(2, at=x.at, labels=0.5*x.at)
     # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
     lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=2, col=col[1])
     lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=2, col=col[2])
   }
   row_n = 1 # row counter
   col_n = 1 # column counter
   for(i in 1:nrow(ref.open))
   { # plot logo center
     right  = 0.5*col_n - 0.01
     left   = right - 0.2
     bottom = 1-(row_n*(1/3))+0.1
     top    = bottom + 0.2
     par(fig=c(left, right, bottom, top), new=T)
     idx = 380:420
     plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t)
     # plot signal (multiplies by 2 because the y-axis goes to 2 bits)
     lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=2, col=col[1])
     lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=2, col=col[2])
     # xaxis
     x.at = 1:length(idx)
     axis(1, at=x.at, labels=x.at)
     # yaxis
     x.at = seq(0, 2, by=1)
     axis(2, at=x.at, labels=x.at)
     row_n = row_n + 1
     if(i %% 3 == 0)
     { col_n = col_n + 1
     row_n = 1
     }
   }
 dev.off()
 
diff --git a/scripts/ch_spark/figures.R b/scripts/ch_spark/figures.R
new file mode 100644
index 0000000..ba789f4
--- /dev/null
+++ b/scripts/ch_spark/figures.R
@@ -0,0 +1,1073 @@
+# REDO THE FIGURES FROM THE ARTICLE BUT IN PNG FORMAT INSTEAD OF PDF, WITH LOWER RESOLUTION 
+# IT IS AN ADAPTED COPY/PASTE FROM /local/groux/Kmeans_chipseq/bin/article/figures.R
+
+setwd(file.path("", "local", "groux", "Kmeans_chipseq"))
+
+library(RColorBrewer)
+library(plotrix)
+
+
+
+
+# ===================================================== Supplemental Figure 1 =====================================================
+# plot the class profiles of the simulated classes and one example of a dataset with low noise, one example with high noise 
+# and their two best partitions
+
+dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark")
+
+source(file.path("res", "functions_utility.R"))
+source(file.path("res", "functions_plot.R"))
+
+# class densities
+# general parameter
+n_samples  = 1000
+n_col      = 2001                   # the length of a signal vector
+shift_max  = 100                    # the maximum possible shift
+p_flip     = 0.3                    # the prob of having a flipped signal    
+# class 1  : a simple gaussian
+class1_n   = 600
+class1_m   = ceiling(n_col/2) - ceiling(shift_max/2) # class 1 mean, mean will be in average in the middle of the data vector
+class1_s   = 40                                      # class 1 sd
+# the signal shape
+shape1     = dnorm(1:(n_col-shift_max+1), class1_m, class1_s)
+# class 2  : half a gaussian
+class2_n   = n_samples - class1_n
+class2_m   = floor(n_col/2) - floor(shift_max/2) # class 2 mean, mean will be in average in the middle of the data vector
+class2_s   = 40                                  # class 2 sd
+# the signal shape
+shape2      = dnorm(1:(n_col-shift_max+1), class2_m, class2_s)
+shape2[class2_m:length(shape2)] = min(shape2)
+# class 3  : a uniform
+class3_n    = 333
+class3_from = floor(n_col/2) - floor(shift_max/2) -120 # class 3 from, mean will be in average in the middle of the data vector
+class3_to   = floor(n_col/2) - floor(shift_max/2) +120 # class 3 to, mean will be in average in the middle of the data vector
+# the signal shape
+shape3      = dunif(1:(n_col-shift_max+1), class3_from, class3_to)
+# normalize
+shape1 = shape1 / sum(shape1)
+shape2 = shape2 / sum(shape2)
+shape2 = shape2 / sum(shape2)
+
+# two datasets and an example of partitioning using SPar-K
+labels       = as.matrix(read.table("data/simulated_data_chipseq/simulated_data_3_class_asym_classes_cov100_noise0.0.txt"))[,1]
+# coverage 100, noise 0
+data.100.0      = as.matrix(read.table(file.path("data", "simulated_data_chipseq", "simulated_data_3_class_asym_cov100_noise0.0.txt")))
+ari             = read.RDS(file.path("results", "simulated_data_chipseq", "app", "simulated_data_chipseq_3_class_asym_ari_newkmean.RDS"))
+best            = which.max(ari$`kmean++`$nooutlier$`cov 100`$`noise 0.0`$`3 cluster`)
+data.100.0.part = read.table(file.path("results",
+                                       "simulated_data_chipseq",
+                                       "app",
+                                       "seeding_kmean++",
+                                       sprintf("simulated_data_3_class_asym_cov100_noise0.0_3cluster_flip_normcorr_%d.txt", best)),
+                             header=T)
+data.100.0.part = realign.data(data.100.0, data.100.0.part$shift_ref, data.100.0.part$shift_dat, data.100.0.part$flip, 71)
+
+# coverage 100, noise 90
+data.100.9      = as.matrix(read.table(file.path("data", "simulated_data_chipseq", "simulated_data_3_class_asym_cov100_noise0.9.txt")))
+best            = which.max(ari$`kmean++`$nooutlier$`cov 100`$`noise 0.9`$`3 cluster`)
+data.100.9.part = read.table(file.path("results",
+                                       "simulated_data_chipseq",
+                                       "app",
+                                       "seeding_kmean++",
+                                       sprintf("simulated_data_3_class_asym_cov100_noise0.9_3cluster_flip_normcorr_%d.txt", best)),
+                             header=T)
+data.100.9.part = realign.data(data.100.9, data.100.9.part$shift_ref, data.100.9.part$shift_dat, data.100.9.part$flip, 71)
+
+
+col      = brewer.pal(3, "Set1")
+col.heat = colorRampPalette(c("white", "red"),  space = "rgb")(100)
+col.lab  = c(rep(col[1], table(labels)[1]),
+             rep(col[2], table(labels)[2]),
+             rep(col[3], table(labels)[3]))  
+
+x.lab     = seq(-1000, 1000, length.out=5)
+x.at      = seq(0, 1, length.out=length(x.lab))
+
+# pdf(file=file.path("results", "article", "supplemental_figure1.pdf"), width=14, height=7)
+png(filename=file.path(dest.dir, "supplemental_figure1.png"),
+    width=14, height=7, units="in", res=300)
+  par(mar=c(5.1, 6.1, 4.1, 2.1))
+
+  lay = layout(mat=matrix(c(1,4,5, 8, 9,
+                            1,4,5, 8, 9,
+                            2,4,5, 8, 9,
+                            2,6,7,10,11,
+                            3,6,7,10,11,
+                            3,6,7,10,11), nrow=6, ncol=5, byrow=T),widths=c(5,0.5,5,0.5,5))
+  # layout.show(lay)
+  
+  # class 1 density
+  x = 1:length(shape1)
+  plot(x, shape1, lwd=3, type='l', col=col[1], main="Class 1 density", xlab="position [bp]", ylab="density", 
+       cex.main=2, cex.axis=2, cex.lab=2)
+  text(x=-200, y=0.0125, labels='A', cex=4.5, xpd=NA, font=2)
+  # class 2 density
+  plot(x, shape2, lwd=3, type='l', col=col[2], main="Class 2 density", xlab="position [bp]", ylab="density", 
+       cex.main=2, cex.axis=2, cex.lab=2)
+  # class 3 density
+  plot(x, shape3, lwd=3, type='l', col=col[3], main="Class 3 density", xlab="position [bp]", ylab="density", 
+       cex.main=2, cex.axis=2, cex.lab=2)
+
+  # dataset coverage 100 noise 0
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(labels, lwd=2, colors=col.lab)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(data.100.0)), col=col.heat, xaxt='n', yaxt='n', main="Coverage 100, noise 0%", ylab="", xlab="position (bp)",
+        cex.main=2.0, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.07, y=1.1, labels='B', cex=4.5, xpd=NA, font=2)
+  
+  # dataset coverage 100 noise 0.9
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(labels, lwd=2, colors=col.lab)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(data.100.9)), col=col.heat, xaxt='n', yaxt='n', main="Coverage 100, noise 90%", ylab="", xlab="position (bp)",
+        cex.main=2.0, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.07, y=1.1, labels='D', cex=4.5, xpd=NA, font=2)
+  
+  # partition of dataset coverage 100 noise 0
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(labels, lwd=2, colors=col.lab)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(data.100.0.part)), col=col.heat, xaxt='n', yaxt='n', main="Coverage 100, noise 0%", ylab="", xlab="Approximated pos. (bp)",
+        cex.main=2.0, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.07, y=1.1, labels='C', cex=4.5, xpd=NA, font=2)
+  
+  # partition dataset coverage 100 noise 0.9
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(labels, lwd=2, colors=col.lab)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(data.100.9.part)), col=col.heat, xaxt='n', yaxt='n', main="Coverage 100, noise 90%", ylab="", xlab="Approximated pos. (bp)",
+        cex.main=2.0, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.07, y=1.1, labels='E', cex=4.5, xpd=NA, font=2)
+dev.off()  
+
+rm(list=ls())
+  
+  
+
+
+# ===================================================== Supplemental Figure 2 =====================================================
+# plot the Adjuted Rand Index values for all programs, measured on the simulated data with different coverages, background to 
+# noise ratios and containing 3 classes.
+# supplemantal figure 1 : results when clustering the data with random  seeding
+# supplemantal figure 2 : results when clustering the data with kmean++ seeding
+
+dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark")
+source(file.path("res", "functions_utility.R"))
+
+
+
+# some colors
+colors = brewer.pal(9, "Set1")
+
+# coverages and signal/noise ratiios used to simulate the data
+coverages = c(10, 50, 100)
+noises    = c(0.0, 0.1, 0.5, 0.9)
+
+# load Adjusted Rand Index measured
+ari.kmean.new = read.RDS(file.path("results", 
+                                   "simulated_data_chipseq", 
+                                   "app",              
+                                   "simulated_data_chipseq_3_class_asym_ari_newkmean.RDS"))
+ari.kmean.reg = read.RDS(file.path("results", "simulated_data_chipseq", 
+                                   "kmean",            
+                                   "simulated_data_chipseq_3_class_asym_ari_kmean.RDS"))
+ari.chippart  = read.RDS(file.path("results",
+                                   "simulated_data_chipseq", 
+                                   "chippartitioning", 
+                                   "simulated_data_chipseq_3_class_asym_ari_chippartitioning.RDS"))
+ari.shuf      = read.RDS(file.path("results",
+                                   "simulated_data_chipseq",
+                                   "simulated_data_chipseq_3_class_asym_gamma_shuffled.RDS"))
+
+# pdf(file=file.path("results", "article", "supplemental_figure2.pdf"), width=16, height=7)
+png(filename=file.path(dest.dir, "supplemental_figure2.png"),
+     width=16, height=7, units="in", res=300)
+  par(mar=c(5.1, 6.1, 4.1, 2.1))
+  
+  colors.boxplot = c(rep(c(colors[1],
+                           colors[5],
+                           colors[2:4]),
+                         each=12),
+                     colors[7])
+  boxplot(
+    # new K-means
+    # random seeding
+    # normal
+    # coverage 10
+    ari.kmean.new[["random"]][["normal"]][["cov 10"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["normal"]][["cov 10"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["normal"]][["cov 10"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["normal"]][["cov 10"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 50
+    ari.kmean.new[["random"]][["normal"]][["cov 50"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["normal"]][["cov 50"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["normal"]][["cov 50"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["normal"]][["cov 50"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 100
+    ari.kmean.new[["random"]][["normal"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["normal"]][["cov 100"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["normal"]][["cov 100"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["normal"]][["cov 100"]][["noise 0.9"]][["3 cluster"]],
+    
+    # new K-means
+    # random seeding
+    # normal
+    # coverage 10
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 10"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 10"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 10"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 10"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 50
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 50"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 50"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 50"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 50"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 100
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.9"]][["3 cluster"]],
+    
+    # regular K-means
+    # random seeding
+    # euclidean distance
+    # kmean++ seeding
+    # coverage 10
+    ari.kmean.reg[["random"]][["eucl"]][["cov 10"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["eucl"]][["cov 10"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["eucl"]][["cov 10"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["eucl"]][["cov 10"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 50
+    ari.kmean.reg[["random"]][["eucl"]][["cov 50"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["eucl"]][["cov 50"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["eucl"]][["cov 50"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["eucl"]][["cov 50"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 100
+    ari.kmean.reg[["random"]][["eucl"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["eucl"]][["cov 100"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["eucl"]][["cov 100"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["eucl"]][["cov 100"]][["noise 0.9"]][["3 cluster"]],
+    
+    # regular K-means
+    # random seeding
+    # euclidean distance
+    # kmean++ seeding
+    # coverage 10
+    ari.kmean.reg[["random"]][["corr"]][["cov 10"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["corr"]][["cov 10"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["corr"]][["cov 10"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["corr"]][["cov 10"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 50
+    ari.kmean.reg[["random"]][["corr"]][["cov 50"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["corr"]][["cov 50"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["corr"]][["cov 50"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["corr"]][["cov 50"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 100
+    ari.kmean.reg[["random"]][["corr"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["corr"]][["cov 100"]][["noise 0.1"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["corr"]][["cov 100"]][["noise 0.5"]][["3 cluster"]],
+    ari.kmean.reg[["random"]][["corr"]][["cov 100"]][["noise 0.9"]][["3 cluster"]],
+    
+    # ChIPparitioning
+    # random seeding
+    # coverage 10
+    ari.chippart[["random"]][["cov 10"]][["noise 0.0"]][["3 cluster"]],
+    ari.chippart[["random"]][["cov 10"]][["noise 0.1"]][["3 cluster"]],
+    ari.chippart[["random"]][["cov 10"]][["noise 0.5"]][["3 cluster"]],
+    ari.chippart[["random"]][["cov 10"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 50
+    ari.chippart[["random"]][["cov 50"]][["noise 0.0"]][["3 cluster"]],
+    ari.chippart[["random"]][["cov 50"]][["noise 0.1"]][["3 cluster"]],
+    ari.chippart[["random"]][["cov 50"]][["noise 0.5"]][["3 cluster"]],
+    ari.chippart[["random"]][["cov 50"]][["noise 0.9"]][["3 cluster"]],
+    # coverage 100
+    ari.chippart[["random"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+    ari.chippart[["random"]][["cov 100"]][["noise 0.1"]][["3 cluster"]],
+    ari.chippart[["random"]][["cov 100"]][["noise 0.5"]][["3 cluster"]],
+    ari.chippart[["random"]][["cov 100"]][["noise 0.9"]][["3 cluster"]],
+    
+    # Random expectations
+    ari.shuf,
+    
+    main="Adjusted Rand Index", xlab="", ylab="ARI",
+    xaxt='n', yaxt='n', cex.main=3, cex.axis=2, cex.lab=2, ylim=c(-0.2, 1.4),
+    col=colors.boxplot)
+  # y axis
+  axis(side=2, at=seq(0, 1, by=0.2), cex.axis=1.3)
+  # add horizontal lines
+  abline(h=1.0, lty=2)
+  abline(h=0.5, lty=2)
+  abline(h=0.0, lty=2)
+  # draw noise values
+  # parameters to draw triangles
+  y_from_tri = -0.05
+  y_to_tri   = y_from_tri
+  x_from_tri = 0.5
+  x_to_tri   = 1
+  x_by_tri   = length(noises) + 1
+  h_tri      = 0.02
+  # parameters to draw noise values
+  x_noise    = 1
+  y_noise    = y_from_tri - 0.05
+  x_by_noise = 1
+  for(i in 1:5)
+  { for(j in 1:length(coverages))
+    { x_to_tri = x_from_tri + x_by_tri - 1
+      polygon(x=c(x_from_tri, x_to_tri, x_to_tri), y=c(y_from_tri, y_to_tri, y_to_tri+h_tri), col="black")
+      for(k in 1:length(noises))
+      {
+        text(x=x_noise, y=y_noise, labels=noises[k], cex=0.8)
+        x_noise = x_noise + x_by_noise
+      }
+      abline(v=x_from_tri, lty=2)
+      x_from_tri = x_to_tri
+    }
+  }
+  abline(v=x_from_tri, lty=2)
+  # label the random values
+  text(x=x_noise, 
+       y=y_noise, 
+       labels="R")
+  # draw coverage values
+  y_cov      = y_from_tri - 0.1
+  y_cov_text = y_cov - 0.05
+  x_from_cov = 1
+  x_to_cov   = 1
+  x_by_cov   = length(noises)
+  for(i in 1:5)
+  { for(j in 1:length(coverages))
+    { x_to_cov = x_from_cov + x_by_cov - 1
+    segments(x0=x_from_cov, x1=x_to_cov, y0=y_cov, y1=y_cov, lwd=3)
+    text(x=x_from_cov + 0.5*(x_by_cov-1), y=y_cov_text, labels=sprintf("cov %d", coverages[j]))
+    x_from_cov = x_to_cov + 1
+    }
+  }
+  # draw legend
+  legend(x=50, y=1.52, legend=c("SPar-K",
+                                "SPar-K (smooth.)",
+                                "K-means (eucl.)",
+                                "K-means (corr.)",
+                                "ChIPPartitioning",
+                                "Random partition"),
+         col=unique(colors.boxplot),
+         cex=1.2, lwd=4, bty='n')
+dev.off()
+
+
+rm(list=ls())
+
+
+
+# ===================================================== Supplemental Figure 4 =====================================================
+# plot the SSE for random and Kmeans++ seedings for simulated ChIP-seq data with 3 classes
+
+dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark")
+source(file.path("res", "functions_utility.R"))
+
+sse = read.RDS(file.path("results",
+                         "simulated_data_chipseq",
+                         "app",
+                         "simulated_data_chipseq_3_class_asym_sse_newkmean.RDS"))
+cov      = "cov 100"
+noise    = "noise 0.0"
+
+# pdf(file=file.path("results", "article", "supplemental_figure4.pdf"), width=10, height=6)
+png(filename=file.path(dest.dir, "supplemental_figure4.png"),
+     width=10, height=6, units="in", res=300)
+
+  par(mar=c(5.1, 6.1, 4.1, 2.1), mfrow=c(2,2))
+  # random seeding, normal
+  option  = "normal"
+  seeding = "random"
+  x = 2:5
+  m = c(
+    # given seeding
+    # coverage 100
+    median(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]])
+  )
+  s = c(
+    # given seeding
+    # coverage 100
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]])
+  )
+  ylim = c(min(m-s), max(m+s))
+  # plot medians
+  plot(x=x, y=m,
+       main="", xlab="Nb of clusters", ylab="SSE",
+       cex.main=3, cex.axis=2, cex.lab=2, lwd=3, type='b', ylim=ylim,
+       xaxt='n')
+  axis(side=1, at=x, cex.axis=2)
+  # plot standard deviations
+  segments(x0=x, x1=x, y0=m-s, y1=m+s, lwd=2)
+  # plot label
+  text(x=1.4, y=ylim[1]+ 1.3*diff(ylim), labels="A", cex=3.5, xpd=NA, font=2)
+  
+  
+  # random seeding, nooutlier
+  option  = "nooutlier"
+  seeding = "random"
+  x       = 2:5
+  m = c(
+    # given seeding
+    # coverage 100
+    median(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]])
+  )
+  s = c(
+    # given seeding
+    # coverage 100
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]])
+  )
+  ylim = c(min(m-s), max(m+s))
+  # plot medians
+  plot(x=x, y=m,
+       main="", xlab="Nb of clusters", ylab="SSE",
+       cex.main=3, cex.axis=2, cex.lab=2, lwd=3, type='b', ylim=ylim,
+       xaxt='n')
+  axis(side=1, at=x, cex.axis=2)
+  # plot standard deviations
+  segments(x0=x, x1=x, y0=m-s, y1=m+s, lwd=2)
+  # plot label
+  text(x=1.4, y=ylim[1]+ 1.3*diff(ylim), labels="B", cex=3.5, xpd=NA, font=2)
+  
+  
+  # kmean++ seeding, normal
+  option  = "normal"
+  seeding = "kmean++"
+  x = 2:5
+  m = c(
+    # given seeding
+    # coverage 100
+    median(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]])
+  )
+  s = c(
+    # given seeding
+    # coverage 100
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]])
+  )
+  ylim = c(min(m-s), max(m+s))
+  # plot medians
+  plot(x=x, y=m,
+       main="", xlab="Nb of clusters", ylab="SSE",
+       cex.main=3, cex.axis=2, cex.lab=2, lwd=3, type='b', ylim=ylim,
+       xaxt='n')
+  axis(side=1, at=x, cex.axis=2)
+  # plot standard deviations
+  segments(x0=x, x1=x, y0=m-s, y1=m+s, lwd=2)
+  # plot label
+  text(x=1.4, y=ylim[1]+ 1.3*diff(ylim), labels="C", cex=3.5, xpd=NA, font=2)
+
+  
+  # kmean++ seeding, nooutlier
+  option  = "nooutlier"
+  seeding = "kmean++"
+  x = 2:5
+  m = c(
+    # given seeding
+    # coverage 100
+    median(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]),
+    median(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]])
+  )
+  s = c(
+    # given seeding
+    # coverage 100
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]),
+    sd(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]])
+  )
+  ylim = c(min(m-s), max(m+s))
+  # plot medians
+  plot(x=x, y=m,
+       main="", xlab="Nb of clusters", ylab="SSE",
+       cex.main=3, cex.axis=2, cex.lab=2, lwd=3, type='b', ylim=ylim,
+       xaxt='n')
+  axis(side=1, at=x, cex.axis=2)
+  # plot standard deviations
+  segments(x0=x, x1=x, y0=m-s, y1=m+s, lwd=2)
+  # plot label
+  text(x=1.4, y=ylim[1]+ 1.3*diff(ylim), labels="D", cex=3.5, xpd=NA, font=2)
+  
+dev.off()
+
+
+rm(list=ls())
+
+
+
+# ===================================================== Supplemental Figure 5 =====================================================
+# plot the runtimes for each prorgam when clustering the simulated ChIP-seq data with 3 classes
+
+dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark")
+source(file.path("res", "functions_utility.R"))
+
+times.new   = read.RDS(file.path("results", "runtime", "runtimes_app.RDS"))
+times.kmean = read.RDS(file.path("results", "runtime", "runtimes_kmean.RDS"))
+times.chipp = read.RDS(file.path("results", "runtime", "runtimes_chippartitioning.RDS"))
+
+data = list(vec1=times.new[["random"]][["normal"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+            vec2=times.new[["kmean++"]][["normal"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+            vec3=times.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+            vec4=times.new[["kmean++"]][["nooutlier"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+            vec5=times.kmean[["random"]][["eucl"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+            vec6=times.kmean[["random"]][["corr"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+            vec7=times.kmean[["kmean++"]][["eucl"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+            vec8=times.kmean[["kmean++"]][["corr"]][["cov 100"]][["noise 0.0"]][["3 cluster"]],
+            vec9=times.chipp[["random"]][["cov 100"]][["noise 0.0"]][["3 cluster"]])
+
+# some colors
+colors = brewer.pal(9, "Set1") 
+colors.boxplot = c(rep(colors[1],2),
+                   rep(colors[5],2),
+                   rep(colors[2],2),
+                   rep(colors[3],2),
+                   rep(colors[4],2))
+
+# pdf(file=file.path("results", "article", "supplemental_figure5.pdf"), width=10, height=6)
+png(filename=file.path(dest.dir, "supplemental_figure5.png"),
+     width=10, height=6, units="in", res=300)
+  par(mar=c(6.1, 6.1, 4.1, 2.1),
+      cex.main=3,
+      cex.axis=1.5,
+      cex.lab=2,
+      xaxt="n")
+  
+  p = par(cex.main=3, cex.axis=1.5, cex.lab=2)
+  
+  # boxplot with a broken y-axis
+  gap.boxplot(data,
+              gap=list(top=c(80,550),bottom=c(NA,NA)),
+              main="Running times", xlab="", ylab="time (sec)", 
+              col=colors.boxplot)
+  # x-axis
+  labels = c(rep(c("rand", "k++"), 4),
+             "rand")
+  axis(1, at=1:9, tick=T, labels=FALSE)
+  text(x=1:9,
+       y=-10,
+       labels=labels,
+       srt=45, adj=1, xpd=TRUE, cex=1.8)
+  # y-axis
+  axis(2, labels=c(seq(0,80,length.out=5), seq(550,650,length.out=5)), 
+       at=c(seq(0,80,length.out=5), seq(550,650,length.out=5)-(550-60)))
+  # legend
+  legend("topleft",
+         legend = c("SPar-K",
+                    "SPar-K (smooth)",
+                    "Kmeans (eucl)",
+                    "Kmeans (corr)",
+                    "ChiPPartitioning"),
+         col = unique(colors.boxplot),
+         cex=1.2, lwd=4, bty='n')
+  grid()
+dev.off()
+
+rm(list=ls())
+
+
+# ===================================================== Supplemental Figure 8 =====================================================
+# figure with the MNase data at CTCF binding sites partition
+# the partition was obtained by running the clustering with a shift of 41, flip and nooutlier
+# the otpimal number of cluster was estimated to be 3, the best partition was estimated to be 
+# the 4th one. Visually, it was providing interesting biological informations but was not 
+# the partition with the lowest SSE for K=3 (but it was not neither the one with the highest).
+
+dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark")
+source(file.path("res", "functions_utility.R"))
+source(file.path("res", "functions_plot.R"))
+
+#' Order the rows of a given matrix by similarity 
+#' (correlation) to the aggregation (in descending 
+#' order) and returns the order.
+#' @param data the matrix of interest.
+#' @return a vector of indices to reorder the 
+#' original matrix.
+#' @author Romain Groux
+get.row.order = function(data)
+{ if(is.vector(data))
+  { return(c(1)) }
+    else
+    { ref    = colSums(data)
+    scores = apply(data, 1, cor, ref)
+    return(order(scores, decreasing=F))
+    }
+}
+
+
+# clustering parameters
+n.cluster  = 4
+n.shift    = 41
+flip       = TRUE
+
+
+# the data
+data  = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_mnase_encode.txt")))
+# some additionnal data
+dnase     = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_dnase_encode_rep1.txt"))) + 
+            as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_dnase_encode_rep2.txt")))
+motif     = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_ctcfmotif_encode.txt")))
+tss.plus  = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_tss_std+_encode.txt")))
+tss.minus = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_tss_std-_encode.txt")))
+tss       = tss.plus + tss.minus
+peaks     = read.table(file.path("data", "data_chipseq", "ctcfpeak.sga"), header=F, stringsAsFactors=F)
+
+# cluster 1 aggregation profiles
+chipcor.tss.m  = read.table(file.path("results",
+                                      "ctcf_mnase_encode2",
+                                      sprintf("ctcfpeak_mnase_%dclusters_newkmean_nooutlier_4_cluster1_tss-.txt",
+                                              n.cluster)))
+chipcor.cage.m = read.table(file.path("results",
+                                      "ctcf_mnase_encode2",
+                                      sprintf("ctcfpeak_mnase_%dclusters_newkmean_nooutlier_4_cluster1_cage-rep1-.txt",
+                                              n.cluster)))
+chipcor.dnase = read.table(file.path("results",
+                                     "ctcf_mnase_encode2",
+                                     sprintf("ctcfpeak_mnase_%dclusters_newkmean_nooutlier_4_cluster1_dnase-rep1.txt",
+                                             n.cluster)))
+chipcor.mnase = read.table(file.path("results",
+                                     "ctcf_mnase_encode2",
+                                     sprintf("ctcfpeak_mnase_%dclusters_newkmean_nooutlier_4_cluster1_mnase.txt",
+                                             n.cluster)))
+
+# the best partition
+results   = read.table(file.path("results", "ctcf_mnase_encode2", 
+                                 sprintf("ctcf_mnase_encode_%dclusters_nooutlier_4.txt", n.cluster)),
+                       header=T)
+results   = format.results(data, results, n.shift, n.cluster)
+
+
+# x-axis labels
+x.lab     = seq(-1000, 1000, length.out=5)
+x.at      = seq(0, 1, length.out=length(x.lab))
+x.at2     = seq(1, ncol(data), length.out=length(x.lab))
+# heatmap colors
+color.1 = colorRampPalette(c("white",  "red"),  space = "rgb")(100)
+color.2 = colorRampPalette(c("white", "blue"),  space = "rgb")(100)
+# cluster colors
+color.lab = brewer.pal(8, "Set1")
+# whether a region has a motif
+has_motif = apply(motif, 1, sum)
+has_motif[which(has_motif > 1)] = 1
+# wheteher a region has a TSS
+has_tss = apply(tss, 1, sum)
+has_tss[which(has_tss > 1)] = 1
+
+# plot
+# pdf(file=file.path("results", "article", "supplemental_figure8.pdf"), width=14, height=8)
+png(filename=file.path(dest.dir, "supplemental_figure8.png"),
+     width=14, height=8, units="in", res=300)
+  
+  # create matrices with the data and the peaks for the heatmap and a vector of color 
+  # labels to plot the cluster assignment on the side of the heatmap
+  d             = matrix(nrow=nrow(data), ncol=ncol(data))
+  p             = d
+  data.aligned  = d
+  motif.aligned = d
+  dnase.aligned = d
+  tss.aligned   = d
+  l             = vector(mode="character", length=nrow(data))
+  from = 1; to = from ;
+  for(j in 1:n.cluster)
+  { index                   = which(results$clusters == j)
+    to                      = from + length(index) -1
+    d[from:to,]             = order.rows(data[index,])
+    data.aligned[from:to,]  = realign.data(data[index,],  results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift)
+    motif.aligned[from:to,] = realign.data(motif[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift)
+    dnase.aligned[from:to,] = realign.data(dnase[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift)
+    tss.aligned[from:to,]   = realign.data(tss[index,],   results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift)
+    order                   = get.row.order(data.aligned[from:to,])
+    data.aligned[from:to,]  = data.aligned[from:to,][order,]
+    motif.aligned[from:to,] = motif.aligned[from:to,][order,]
+    dnase.aligned[from:to,] = dnase.aligned[from:to,][order,]
+    tss.aligned[from:to,]   = tss.aligned[from:to,][order,]
+    l[from:to]  = color.lab[j]
+    from = to + 1
+  }
+  
+  
+  p = par(oma=c(0,0,5,0))
+  # layout construction
+  labels = c(1, 2, 3, 4, 5, 6, 7, 8,
+             1, 2, 3, 4, 5, 6, 7, 8,
+             9, 10,11,11,12,12,13,13,
+             9, 10,11,11,12,12,13,13)
+  lay = layout(matrix(data=labels, nrow=4, ncol=8, byrow=T), widths=c(0.5,5,0.5,5,0.5,5,0.5,5,0.5,5,0.5,5))
+  # layout.show(lay)
+  
+  # data heatmap
+  # p = par(mar=c(5, 0, 4, 1) + 0.1)
+  # plot.label.bar(results$clusters, lwd=2, colors=l)
+  # p = par(mar=c(5, 10, 4, 4) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  # image(t(condense.matrix(d)), col=color.1, xaxt='n', yaxt='n', main="MNase Data", ylab="", xlab="Position (bp)",
+  #       cex.main=2.5, cex.lab=2.5)
+  # axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  # text(x=-0.08, y=1.15, labels='A', cex=4, xpd=NA, font=2)
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot(1,1, xaxt='n', yaxt='n', col="white", xlab="", ylab="", main="", bty='n')
+  p = par(mar=c(5, 10, 4, 4) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(order.rows(data))), col=color.1, xaxt='n', yaxt='n', main="MNase Data", ylab="", xlab="Position (bp)",
+        cex.main=2.5, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.08, y=1.15, labels='A', cex=4, xpd=NA, font=2)
+  # realigned data heatmap
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(results$clusters, lwd=2, colors=l)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(data.aligned)), col=color.1, xaxt='n', yaxt='n', main="Aligned MNase", ylab="", xlab="Approx. pos. (bp)",
+        cex.main=2.5, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.08, y=1.15, labels='B', cex=4, xpd=NA, font=2)
+  # realigned DNaseI
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(results$clusters, lwd=2, colors=l)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(dnase.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned DNaseI", ylab="", xlab="Approx. pos. (bp)",
+        cex.main=2.5, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.08, y=1.15, labels='C', cex=4, xpd=NA, font=2)
+  # realigned motifs
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(results$clusters, lwd=2, colors=l)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(motif.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned motifs", ylab="", xlab="Approx. pos. (bp)",
+        cex.main=2.5, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.08, y=1.15, labels='D', cex=4, xpd=NA, font=2)
+  # realigned TSSs
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(results$clusters, lwd=2, colors=l)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(tss.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned TSSs", ylab="", xlab="Approx. pos. (bp)",
+        cex.main=2.5, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.08, y=1.15, labels='E', cex=4, xpd=NA, font=2)
+  
+  # cluster 2 aggregations
+  x        = chipcor.dnase[,1]
+  y.dnase  = chipcor.dnase[,2]
+  y.mnase  = chipcor.mnase[,2]
+  y.tss.m  = chipcor.tss.m[,2]
+  y.cage.m = chipcor.cage.m[,2]
+  p = par(mar=c(5.1,6.1,4.1,2.1))
+  plot(x=x,  y=y.mnase/max(y.mnase), lwd=3, col=color.lab[2], type='l',
+       xlab="Approximated position (bp)",
+       ylab="Prop. of max signal",
+       main="Cluster 1",
+       ylim=c(0,1.2),
+       cex.main=2.5, cex.axis=2, cex.lab=2)
+  lines(x=x, y=y.dnase/max(y.dnase),   lwd=3, col=color.lab[1], lty=1) # dnase on both std / at orinted peaks
+  lines(x=x, y=y.tss.m/max(y.tss.m),   lwd=2, col=color.lab[3], lty=1) # tss   on    - std / at orinted peaks
+  lines(x=x, y=y.cage.m/max(y.cage.m), lwd=2, col=color.lab[4], lty=1) # cage  on    - std / at orinted peaks
+  legend("topright", legend=c("MNase",
+                              "DNaseI",
+                              "TSS -std",
+                              "CAGE -std"),
+         seg.len=0.5, col=c(color.lab[c(2,1,3,4)]), lwd=c(3,3,2,2), bty="n", cex=1)
+  text(x=-1100, y=1.42, labels='F', cex=4.5, xpd=NA, font=2)
+  
+  # motif proportions
+  motif_prop = vector(mode="numeric", length=n.cluster)
+  for(j in 1:n.cluster)
+  { index        = which(results$clusters == j) 
+    motif_prop[j] = sum(has_motif[index]) / length(index)
+  }
+  barplot(height=motif_prop, ylim=c(0,1),col=color.lab[1:j], 
+          main="Prop. CTCF motif", xlab="clusters", ylab="Prop. region with motif",
+          names.arg=1:n.cluster,
+          cex.main=2.0, cex.lab=2, cex.axis=2)
+  text(x=-0.08, y=1.15, labels='G', cex=4, xpd=NA, font=2)
+  
+  # TSS proportions
+  tss_prop = vector(mode="numeric", length=n.cluster)
+  for(j in 1:n.cluster)
+  { index        = which(results$clusters == j) 
+    tss_prop[j] = sum(has_tss[index]) / length(index)
+  }
+  barplot(height=tss_prop, ylim=c(0,1),col=color.lab[1:j],
+          main="Prop. TSS", xlab="clusters", ylab="Prop. region with TSS",
+          names.arg=1:n.cluster,
+          cex.main=2.0, cex.lab=2, cex.axis=2)
+  text(x=-0.08, y=1.15, labels='H', cex=4, xpd=NA, font=2)
+  
+  par(p)
+dev.off()
+
+rm(list=ls())
+
+
+
+
+
+# ===================================================== Figure 1 =====================================================
+# figure with the DNaseI data at SP1 binding sites partition
+# the partition was obtained by running the clustering with a shift of 41, flip and nooutlier
+# The best partition is the 7th which is the 4th partition with the lowest SSE (the 4 lowest SSE 
+# values are really close from each other, ~1/600 of diff) and it looks  also really nice in terms 
+# of biology with aligned footprints andclusters loooking different 
+
+dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark")
+source(file.path("res", "functions_utility.R"))
+source(file.path("res", "functions_plot.R"))
+
+
+#' Order the rows of a given matrix by similarity 
+#' (correlation) to the aggregation (in descending 
+#' order) and returns the order.
+#' @param data the matrix of interest.
+#' @return a vector of indices to reorder the 
+#' original matrix.
+#' @author Romain Groux
+get.row.order = function(data)
+{ if(is.vector(data))
+  { return(c(1)) }
+  else
+  { ref    = colSums(data)
+   scores = apply(data, 1, cor, ref)
+    return(order(scores, decreasing=F))
+  }
+}
+
+
+
+# clustering parameters
+n.shift      = 41
+flip         = TRUE
+n.cluster    = 3
+
+# the data
+data    = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_dnase_big_clean.txt")))
+# some additionnal data
+mnase     = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_mnase_big_clean.txt")))
+motif     = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_sp1motif_big_clean.txt")))
+tss       = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_tss_big_clean.txt")))
+tss.plus  = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_tss_std+_big.txt")))
+tss.minus = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_tss_std-_big.txt")))
+peaks     = read.table(file.path("data", "sp1_dnase", "sp1peak_clean.sga"), header=F, stringsAsFactors=F)
+
+# this is the best partition to me
+results = read.table(file.path("results", "sp1_dnase3", sprintf("sp1peak_dnase_%dclusters_nooutlier_7.txt", n.cluster)), header=T)
+results = format.results(data, results, n.shift, n.cluster)
+
+# cluster 2 aggregation profiles
+chipcor.tss.m   = read.table(file.path("results",
+                                       "sp1_dnase3",
+                                       sprintf("sp1peak_dnase_%dclusters_newkmean_nooutlier_7_cluster2_tss-.txt", 
+                                               n.cluster))) 
+chipcor.cage.m  = read.table(file.path("results",
+                                       "sp1_dnase3",
+                                       sprintf("sp1peak_dnase_%dclusters_newkmean_nooutlier_7_cluster2_cage-.txt", 
+                                               n.cluster))) 
+chipcor.dnase  = read.table(file.path("results",
+                                      "sp1_dnase3",
+                                      sprintf("sp1peak_dnase_%dclusters_newkmean_nooutlier_7_cluster2_dnase_rep1.txt", 
+                                              n.cluster))) 
+chipcor.mnase  = read.table(file.path("results",
+                                      "sp1_dnase3",
+                                      sprintf("sp1peak_dnase_%dclusters_newkmean_nooutlier_7_cluster2_mnase.txt", 
+                                              n.cluster))) 
+
+
+x.lab     = seq(-300, 300, length.out=5)
+x.at      = seq(0, 1, length.out=length(x.lab))
+x.at2     = seq(1, ncol(data), length.out=length(x.lab))
+# heatmap colors
+color.1 = colorRampPalette(c("white", "red"),  space = "rgb")(100)
+color.2 = colorRampPalette(c("white", "blue"),  space = "rgb")(100)
+# cluster colors
+color.lab = brewer.pal(8, "Set1")
+# whether a region has a motif
+has_motif = apply(motif, 1, sum)
+has_motif[which(has_motif > 1)] = 1
+# whether a region has a TSS
+has_tss = apply(tss, 1, sum)
+has_tss[which(has_tss > 1)] = 1
+
+# plot
+# pdf(file=file.path("results", "article", "figure1.pdf"), width=14, height=8)
+png(filename=file.path(dest.dir, "figure1.png"),
+     width=14, height=8, units="in", res=300)
+  # create matrices with the data and the peaks for the heatmap and a vector of color 
+  # labels to plot the cluster assignment on the side of the heatmap
+  d             = matrix(nrow=nrow(data), ncol=ncol(data))
+  p             = d
+  data.aligned  = d
+  motif.aligned = d
+  mnase.aligned = d
+  tss.aligned   = d
+  l             = vector(mode="character", length=nrow(data))
+  from = 1; to = from ;
+  for(j in 1:n.cluster)
+  { index                   = which(results$clusters == j)
+    to                      = from + length(index) -1
+    d[from:to,]             = order.rows(data[index,])
+    data.aligned[from:to,]  = realign.data(data[index,],  results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift)
+    motif.aligned[from:to,] = realign.data(motif[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift)
+    mnase.aligned[from:to,] = realign.data(mnase[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift)
+    tss.aligned[from:to,]   = realign.data(tss[index,],   results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift)
+    order                   = get.row.order(data.aligned[from:to,])
+    data.aligned[from:to,]  = data.aligned[from:to,][order,]
+    motif.aligned[from:to,] = motif.aligned[from:to,][order,]
+    mnase.aligned[from:to,] = mnase.aligned[from:to,][order,]
+    tss.aligned[from:to,]   = tss.aligned[from:to,][order,]
+    l[from:to]  = color.lab[j]
+    from = to + 1
+  }
+  
+  p = par(oma=c(0,0,5,0))
+  # layout construction
+  labels = c(1, 2, 3, 4, 5, 6, 7, 8,
+             1, 2, 3, 4, 5, 6, 7, 8,
+             1, 2, 3, 4, 5, 6, 7, 8,
+             9, 9,10,10,11,11,11,11,
+             9, 9,10,10,11,11,11,11)
+  lay = layout(matrix(data=labels, nrow=5, ncol=8, byrow=T), widths=c(0.5,5,0.5,5,0.5,5,0.5,5,0.5,5,0.5,5))
+  # layout.show(lay)
+  
+  # p = par(oma=c(0,0,5,0))
+  # # layout construction
+  # labels = c(1, 2, 3, 4, 5, 6, 7, 8,
+  #            1, 2, 3, 4, 5, 6, 7, 8,
+  #            9, 10,11,11,12,12,12,12,
+  #            9, 10,11,11,12,12,12,12)
+  # lay = layout(matrix(data=labels, nrow=4, ncol=8, byrow=T), widths=c(0.5,5,0.5,5,0.5,5,0.5,5,0.5,5,0.5,5))
+  # layout.show(lay)
+  
+  # data heatmap
+  # p = par(mar=c(5, 0, 4, 1) + 0.1)
+  # plot.label.bar(results$clusters, lwd=2, colors=l)
+  # p = par(mar=c(5, 10, 4, 4) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  # image(t(condense.matrix(d)), col=color.1, xaxt='n', yaxt='n', main="DNaseI data", ylab="", xlab="Position (bp)",
+  #       cex.main=2.5, cex.lab=2.5)
+  # axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  # text(x=-0.07, y=1.1, labels='A', cex=4.5, xpd=NA, font=2)
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot(1,1, xaxt='n', yaxt='n', col="white", xlab="", ylab="", main="", bty='n')
+  p = par(mar=c(5, 10, 4, 4) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(order.rows(data))), col=color.1, xaxt='n', yaxt='n', main="DNaseI data", ylab="", xlab="Position (bp)",
+        cex.main=2.5, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.07, y=1.1, labels='A', cex=4.5, xpd=NA, font=2)
+  # realigned data heatmap
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(results$clusters, lwd=2, colors=l)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(data.aligned)), col=color.1, xaxt='n', yaxt='n', main="Aligned DNaseI", ylab="", xlab="Approx. pos. (bp)",
+        cex.main=2.5, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.07, y=1.1, labels='B', cex=4.5, xpd=NA, font=2)
+  # realigned MNase
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(results$clusters, lwd=2, colors=l)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(mnase.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned MNase", ylab="", xlab="Approx. pos. (bp)",
+        cex.main=2.5, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.07, y=1.1, labels='C', cex=4.5, xpd=NA, font=2)
+  # realigned motifs
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot.label.bar(results$clusters, lwd=2, colors=l)
+  p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1))
+  image(t(condense.matrix(motif.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned motifs", ylab="", xlab="Approx. pos. (bp)",
+        cex.main=2.5, cex.lab=2.5)
+  axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7)
+  text(x=-0.07, y=1.1, labels='D', cex=4.5, xpd=NA, font=2)
+  
+  # proportion TSS in each cluster
+  prop_tss = vector(mode="numeric", length=n.cluster)
+  for(j in 1:n.cluster)
+  { index = which(results$clusters == j)
+    prop_tss[j] = sum(has_tss[index]) / length(index)
+  }
+  p = par(mar=c(5.1,5.1,4.1,1.1))
+  barplot(prop_tss, col=color.lab, 
+          main="Prop. TSS", xlab="clusters", ylab="Prop. region with TSS",
+          names.arg=1:n.cluster,
+          cex.main=2.5, cex.lab=2, cex.axis=2)
+  text(x=-0.2, 0.65, labels='E', cex=4.5, xpd=NA, font=2)
+  
+  # cluster 2 aggregations
+  x        = chipcor.dnase[,1]
+  y.dnase  = chipcor.dnase[,2]
+  y.mnase  = chipcor.mnase[,2]
+  y.tss.m  = chipcor.tss.m[,2]
+  y.cage.m = chipcor.cage.m[,2]
+  p = par(mar=c(5.1,6.1,4.1,2.1))
+  plot(x=x,  y=y.mnase/max(y.mnase), lwd=3, col=color.lab[2], type='l',
+       xlab="Approx. pos. (bp)",
+       ylab="Prop. of max signal",
+       main="Cluster 2",
+       ylim=c(0,1.2),
+       cex.main=2.5, cex.axis=1.7, cex.lab=2.5)
+  lines(x=x, y=y.dnase/max(y.dnase),   lwd=3, col=color.lab[1], lty=1) # dnase on both std / at orinted peaks
+  lines(x=x, y=y.tss.m/max(y.tss.m),   lwd=2, col=color.lab[3], lty=1) # tss   on    - std / at orinted peaks
+  lines(x=x, y=y.cage.m/max(y.cage.m), lwd=2, col=color.lab[4], lty=1) # cage  on    - std / at orinted peaks
+  legend("topright", legend=c("MNase",
+                              "DNaseI",
+                              "TSS -std",
+                              "CAGE -std"),
+         seg.len=0.5, col=c(color.lab[c(2,1,3,4)]), lwd=c(3,3,2,2), bty="n", cex=1)
+  text(x=-300, 1.5, labels='F', cex=4.5, xpd=NA, font=2)
+  
+  # write motif found by MEME
+  p = par(mar=c(5, 0, 4, 1) + 0.1)
+  plot(0,0, bty="n", xaxt="n", yaxt="n", main="De novo discovered motifs", xlab="", ylab="", cex.main=2.5,
+       xlim=c(1,100), ylim=c(1,100), col="white")
+  # cluster 1
+  text(x=0 , y=100,  labels="Cluster 1",        cex=2, pos=4, font=2, xpd=NA, col=color.lab[1])
+  text(x=0 , y=88,   labels="*NFYA / NFYB",     cex=2, pos=4)
+  text(x=0 , y=76,   labels="*SP related",      cex=2, pos=4)
+  # cluster 2 left
+  text(x=35 , y=100, labels="Cluster 2 left",  cex=2, pos=4, font=2, xpd=NA, col=color.lab[2])
+  text(x=35 , y=88,  labels="*SP related",      cex=2, pos=4)
+  text(x=35 , y=76,  labels="*NFYA / NFYB",     cex=2, pos=4)
+  text(x=35 , y=64,  labels=" GATA6 / GATA3",   cex=2, pos=4)
+  text(x=35 , y=52,  labels=" SFPI1 / SPIC",    cex=2, pos=4)
+  text(x=35 , y=40,  labels=" FOX related",     cex=2, pos=4)
+  text(x=35 , y=28,  labels=" ARNTL / BHLHE41", cex=2, pos=4)
+  # cluster 2 right
+  text(x=35 , y=16,  labels="Cluster 2 right",   cex=2, pos=4, font=2, xpd=NA, col=color.lab[2])
+  text(x=35 , y=04,  labels="*SP related",      cex=2, pos=4, xpd=NA)
+  text(x=35 , y=-8,  labels="*NFYA / NFYB",     cex=2, pos=4, xpd=NA)
+  text(x=35 , y=-20, labels=" ARNTL / BHLHE41", cex=2, pos=4, xpd=NA)
+  # cluster 3
+  text(x=75 , y=100,  labels="Cluster 3",       cex=2, pos=4, font=2, xpd=NA, col=color.lab[3])
+  text(x=75 , y=88,  labels="*SP related (c)",  cex=2, pos=4)
+  text(x=75 , y=76,  labels="*NFYA / NFYB",     cex=2, pos=4)
+  text(x=75 , y=64,  labels="*GATA related",    cex=2, pos=4)
+  text(x=75 , y=52,  labels=" ETS related",     cex=2, pos=4)
+  text(x=75 , y=40,  labels=" ATF1",            cex=2, pos=4)
+  text(x=75 , y=28,  labels="*AP1 related",     cex=2, pos=4)
+  text(x=75 , y=16,  labels="*NRF1 (c)",        cex=2, pos=4)
+  text(x=02, 125, labels='G', cex=4.5, xpd=NA, font=2)
+  
+  par(p)
+  
+dev.off()
+
+rm(list=ls())
+
+
diff --git a/scripts/ch_spark/get_figures.sh b/scripts/ch_spark/get_figures.sh
index ada5425..b498aec 100644
--- a/scripts/ch_spark/get_figures.sh
+++ b/scripts/ch_spark/get_figures.sh
@@ -1,15 +1,17 @@
 dest_dir='/local/groux/phd_thesis/images/ch_spark'
 script_dir='/local/groux/phd_thesis/scripts/ch_spark'
 targ_dir='/local/groux/Kmeans_chipseq/results/article'
 
 mkdir -p $dest_dir
 
-cp $targ_dir/figure1.pdf $dest_dir
-cp $targ_dir/supplemental_figure1.pdf $dest_dir
-cp $targ_dir/supplemental_figure2.pdf $dest_dir
-cp $targ_dir/supplemental_figure4.pdf $dest_dir
-cp $targ_dir/supplemental_figure5.pdf $dest_dir
-cp $targ_dir/supplemental_figure8.pdf $dest_dir
+# cp $targ_dir/figure1.pdf $dest_dir
+# cp $targ_dir/supplemental_figure1.pdf $dest_dir
+# cp $targ_dir/supplemental_figure2.pdf $dest_dir
+# cp $targ_dir/supplemental_figure4.pdf $dest_dir
+# cp $targ_dir/supplemental_figure5.pdf $dest_dir
+# cp $targ_dir/supplemental_figure8.pdf $dest_dir
 
+# redo the article figures in png format, with lower resolution
+Rscript $script_dir/figures.R