diff --git a/main/ch_atac-seq.aux b/main/ch_atac-seq.aux
index 5ca24ea..5ab3784 100644
--- a/main/ch_atac-seq.aux
+++ b/main/ch_atac-seq.aux
@@ -1,174 +1,168 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
 \citation{vierstra_genomic_2016}
 \citation{neph_expansive_2012}
 \citation{adey_rapid_2010,buenrostro_transposition_2013}
 \citation{barski_high-resolution_2007}
 \citation{vierstra_genomic_2016}
 \citation{vierstra_genomic_2016}
 \citation{adey_rapid_2010,buenrostro_transposition_2013}
 \citation{adey_rapid_2010}
-\citation{adey_rapid_2010}
 \@writefile{toc}{\contentsline {chapter}{\numberline {4}Chromatin accessibility of monocytes}{57}{chapter.4}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
 \@writefile{loa}{\addvspace {10\p@ }}
 \newlabel{atac_seq}{{4}{57}{Chromatin accessibility of monocytes}{chapter.4}{}}
 \@writefile{chapter}{\contentsline {toc}{Chromatin accessibility of monocytes}{57}{chapter.4}}
 \@writefile{toc}{\contentsline {section}{\numberline {4.1}ATAC-seq}{57}{section.4.1}}
 \@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces \textbf  {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep  {vierstra_genomic_2016}.\relax }}{58}{figure.caption.31}}
 \newlabel{atac_seq_atac_seq}{{4.1}{58}{\textbf {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep {vierstra_genomic_2016}.\relax }{figure.caption.31}{}}
+\citation{adey_rapid_2010}
 \citation{neph_expansive_2012}
 \citation{berest_quantification_2018}
 \citation{grossman_positional_2018}
 \@writefile{toc}{\contentsline {section}{\numberline {4.2}Monitoring TF binding}{59}{section.4.2}}
 \citation{angerer_single_2017}
-\citation{fan_characterizing_2016,kiselev_sc3:_2017}
-\citation{aibar_scenic:_2017}
-\citation{gonzalez-blas_cistopic:_2019}
-\citation{buenrostro_transposition_2013}
 \@writefile{toc}{\contentsline {section}{\numberline {4.3}The advent of single cell DGF}{60}{section.4.3}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.4}A quick overview of scATAC-seq data analysis}{60}{section.4.4}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.5}Open questions}{60}{section.4.5}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces \textbf  {framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.\relax }}{61}{figure.caption.32}}
-\newlabel{atac_seq_pipeline}{{4.2}{61}{\textbf {framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.\relax }{figure.caption.32}{}}
-\citation{hepler_10x_2018}
+\@writefile{toc}{\contentsline {section}{\numberline {4.4}Open issues}{60}{section.4.4}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.5}Data}{60}{section.4.5}}
 \citation{hon_chromasig:_2008}
 \citation{nielsen_catchprofiles:_2012}
 \citation{kundaje_ubiquitous_2012}
 \citation{nair_probabilistic_2014}
 \citation{groux_spar-k:_2019}
-\@writefile{toc}{\contentsline {section}{\numberline {4.6}Data}{62}{section.4.6}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.7}Identification of catalog of chromatin architectures}{62}{section.4.7}}
 \citation{nair_probabilistic_2014}
 \citation{nair_probabilistic_2014}
 \citation{nair_probabilistic_2014}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{63}{subsection.4.7.1}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textbf  {Illustration of the expectation-maximization algorithms} \textbf  {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep  {nair_probabilistic_2014}. \textbf  {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.  EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }}{63}{figure.caption.33}}
-\newlabel{atac_seq_em}{{4.3}{63}{\textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.\\ EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }{figure.caption.33}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.6}Identifying over-represented signals}{61}{section.4.6}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.6.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{61}{subsection.4.6.1}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.6.2}EMSequence : an algorithm to identify over-represented sequences}{61}{subsection.4.6.2}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces \textbf  {Illustration of the expectation-maximization algorithms} \textbf  {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep  {nair_probabilistic_2014}. \textbf  {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.  EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }}{62}{figure.caption.32}}
+\newlabel{atac_seq_em}{{4.2}{62}{\textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.\\ EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }{figure.caption.32}{}}
 \citation{nair_probabilistic_2014}
 \citation{nair_probabilistic_2014}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.2}EMSequence : an algorithm to identify over-represented sequences}{64}{subsection.4.7.2}}
-\@writefile{toc}{\contentsline {subsubsection}{without shift and flip}{64}{subsection.4.7.2}}
-\newlabel{atac_seq_emseq_likelihood}{{4.1}{64}{without shift and flip}{equation.4.7.1}{}}
 \citation{nair_probabilistic_2014}
+\@writefile{toc}{\contentsline {subsubsection}{without shift and flip}{63}{figure.caption.32}}
+\newlabel{atac_seq_emseq_likelihood}{{4.1}{63}{without shift and flip}{equation.4.6.1}{}}
+\newlabel{atac_seq_emseq_update_model}{{4.2}{63}{without shift and flip}{equation.4.6.2}{}}
+\@writefile{toc}{\contentsline {subsubsection}{with shift and flip}{63}{equation.4.6.2}}
 \citation{nair_probabilistic_2014}
 \citation{nair_probabilistic_2014}
-\newlabel{atac_seq_emseq_update_model}{{4.2}{65}{without shift and flip}{equation.4.7.2}{}}
-\@writefile{toc}{\contentsline {subsubsection}{with shift and flip}{65}{equation.4.7.2}}
-\newlabel{atac_seq_emseq_likelihood_shift_flip}{{4.3}{65}{with shift and flip}{equation.4.7.3}{}}
-\newlabel{atac_seq_emseq_reverse_motif}{{4.4}{65}{with shift and flip}{equation.4.7.4}{}}
-\newlabel{atac_seq_emseq_update_model_shift_flip}{{4.5}{66}{with shift and flip}{equation.4.7.5}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{66}{subsection.4.7.3}}
+\newlabel{atac_seq_emseq_likelihood_shift_flip}{{4.3}{64}{with shift and flip}{equation.4.6.3}{}}
+\newlabel{atac_seq_emseq_reverse_motif}{{4.4}{64}{with shift and flip}{equation.4.6.4}{}}
+\newlabel{atac_seq_emseq_update_model_shift_flip}{{4.5}{64}{with shift and flip}{equation.4.6.5}{}}
 \citation{nair_probabilistic_2014}
 \citation{nair_probabilistic_2014}
-\newlabel{atac_seq_emjoint_likelihood}{{4.6}{67}{EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{equation.4.7.6}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.4}Data realignment}{67}{subsection.4.7.4}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.6.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{65}{subsection.4.6.3}}
+\newlabel{atac_seq_emjoint_likelihood}{{4.6}{65}{EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{equation.4.6.6}{}}
 \citation{voss_dynamic_2014}
 \citation{cirillo_opening_2002,zaret_pioneer_2011,soufi_pioneer_2015}
 \citation{buenrostro_transposition_2013}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces \textbf  {Fragment size analysis} \textbf  {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf  {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf  {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }}{68}{figure.caption.34}}
-\newlabel{atac_seq_fragment_size}{{4.4}{68}{\textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }{figure.caption.34}{}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.8}Results}{68}{section.4.8}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.8.1}Fragment size analysis}{68}{subsection.4.8.1}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces \textbf  {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf  {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf  {Middle row :} each position of the reads were used. \textbf  {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.  The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }}{69}{figure.caption.35}}
-\newlabel{atac_seq_ctcf_all_data}{{4.5}{69}{\textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }{figure.caption.35}{}}
 \citation{buenrostro_transposition_2013}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.6.4}Data realignment}{66}{subsection.4.6.4}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.7}Results}{66}{section.4.7}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.1}Fragment size analysis}{66}{subsection.4.7.1}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textbf  {Fragment size analysis} \textbf  {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf  {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf  {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }}{67}{figure.caption.33}}
+\newlabel{atac_seq_fragment_size}{{4.3}{67}{\textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }{figure.caption.33}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces \textbf  {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf  {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf  {Middle row :} each position of the reads were used. \textbf  {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.  The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }}{68}{figure.caption.34}}
+\newlabel{atac_seq_ctcf_all_data}{{4.4}{68}{\textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }{figure.caption.34}{}}
 \citation{adey_rapid_2010}
 \citation{buenrostro_transposition_2013,li_identification_2019}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces \textbf  {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }}{70}{figure.caption.36}}
-\newlabel{atac_seq_ctcf_sp1_myc_ebf1_footprint}{{4.6}{70}{\textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }{figure.caption.36}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.8.2}Measuring open chromatin and nucleosome occupancy}{70}{subsection.4.8.2}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces \textbf  {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }}{69}{figure.caption.35}}
+\newlabel{atac_seq_ctcf_sp1_myc_ebf1_footprint}{{4.5}{69}{\textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }{figure.caption.35}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.2}Measuring open chromatin and nucleosome occupancy}{69}{subsection.4.7.2}}
 \citation{neph_expansive_2012}
 \citation{fu_insulator_2008}
 \citation{neph_expansive_2012}
 \citation{kundaje_ubiquitous_2012}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.8.3}Evaluation of EMSequence and ChIPPartitioning}{72}{subsection.4.8.3}}
-\@writefile{toc}{\contentsline {subsubsection}{EMSequence}{72}{subsection.4.8.3}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.3}Evaluation of EMSequence and ChIPPartitioning}{71}{subsection.4.7.3}}
+\@writefile{toc}{\contentsline {subsubsection}{EMSequence}{71}{subsection.4.7.3}}
 \citation{kent_blatblast-like_2002}
 \citation{chatr-aryamontri_biogrid_2017}
 \citation{castro-mondragon_rsat_2017}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces \textbf  {Classification performances on simulated data :} \textbf  {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf  {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }}{73}{figure.caption.37}}
-\newlabel{atac_seq_emseq_auc_roc}{{4.7}{73}{\textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }{figure.caption.37}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces \textbf  {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }}{74}{figure.caption.38}}
-\newlabel{atac_seq_emseq_sp1_10class}{{4.8}{74}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }{figure.caption.38}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces \textbf  {Classification performances on simulated data :} \textbf  {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf  {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }}{72}{figure.caption.36}}
+\newlabel{atac_seq_emseq_auc_roc}{{4.6}{72}{\textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }{figure.caption.36}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces \textbf  {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }}{73}{figure.caption.37}}
+\newlabel{atac_seq_emseq_sp1_10class}{{4.7}{73}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }{figure.caption.37}{}}
 \citation{nair_probabilistic_2014}
-\@writefile{toc}{\contentsline {subsubsection}{ChIPPartitioning}{75}{figure.caption.38}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces \textbf  {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{76}{figure.caption.39}}
-\newlabel{atac_seq_emread_ctcf_noshift_flip}{{4.9}{76}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.39}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.10}{\ignorespaces \textbf  {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{76}{figure.caption.40}}
-\newlabel{atac_seq_emread_ctcf_shift_flip}{{4.10}{76}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.40}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces \textbf  {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{74}{figure.caption.38}}
+\newlabel{atac_seq_emread_ctcf_noshift_flip}{{4.8}{74}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.38}{}}
+\@writefile{toc}{\contentsline {subsubsection}{ChIPPartitioning}{74}{figure.caption.37}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces \textbf  {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{75}{figure.caption.39}}
+\newlabel{atac_seq_emread_ctcf_shift_flip}{{4.9}{75}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.39}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.8}Aligning the binding sites}{76}{section.4.8}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.10}{\ignorespaces \textbf  {Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{77}{figure.caption.40}}
+\newlabel{atac_seq_23class}{{4.10}{77}{\textbf {Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.40}{}}
 \citation{kurotaki_transcriptional_2017,rico_comparative_2017}
-\@writefile{toc}{\contentsline {section}{\numberline {4.9}Aligning the binding sites}{77}{section.4.9}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.11}{\ignorespaces \textbf  {Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{78}{figure.caption.41}}
-\newlabel{atac_seq_23class}{{4.11}{78}{\textbf {Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.41}{}}
 \citation{castro-mondragon_rsat_2017}
-\@writefile{toc}{\contentsline {section}{\numberline {4.10}Exploring individual TF classes}{79}{section.4.10}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.12}{\ignorespaces \textbf  {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{80}{figure.caption.42}}
-\newlabel{atac_seq_ctcf_subclass}{{4.12}{80}{\textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.42}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.11}{\ignorespaces \textbf  {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{79}{figure.caption.41}}
+\newlabel{atac_seq_ctcf_subclass}{{4.11}{79}{\textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.41}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.9}Exploring individual TF classes}{79}{section.4.9}}
 \citation{marsland_machine_2015-1}
 \citation{fan_characterizing_2016,kiselev_sc3:_2017}
 \citation{aibar_scenic:_2017}
 \citation{gonzalez-blas_cistopic:_2019}
-\@writefile{toc}{\contentsline {section}{\numberline {4.11}Discussions}{81}{section.4.11}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.12}Perspectives}{81}{section.4.12}}
-\@writefile{toc}{\contentsline {section}{\numberline {4.13}Methods}{82}{section.4.13}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.1}Implementations}{82}{subsection.4.13.1}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.2}Fragment classes}{82}{subsection.4.13.2}}
-\newlabel{atac_seq_fragment_length_class}{{4.7}{82}{Fragment classes}{equation.4.13.7}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.10}Discussions}{80}{section.4.10}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.11}Perspectives}{80}{section.4.11}}
+\citation{castro-mondragon_rsat_2017}
+\@writefile{toc}{\contentsline {section}{\numberline {4.12}Methods}{81}{section.4.12}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.1}Implementations}{81}{subsection.4.12.1}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.2}Fragment classes}{81}{subsection.4.12.2}}
+\newlabel{atac_seq_fragment_length_class}{{4.7}{81}{Fragment classes}{equation.4.12.7}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.3}Simulated sequences}{81}{subsection.4.12.3}}
 \citation{dalton_clustering_2009}
 \citation{nair_probabilistic_2014}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.3}Simulated sequences}{83}{subsection.4.13.3}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.4}Realignment using JASPAR motifs}{83}{subsection.4.13.4}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.5}Display of motif logo}{83}{subsection.4.13.5}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.6}Model extension}{83}{subsection.4.13.6}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.7}Extracting data assigned to a class}{83}{subsection.4.13.7}}
-\newlabel{encode_peaks_algo_ndr_extend}{{3}{84}{Extracting data assigned to a class}{algocfline.3}{}}
-\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Computes a matrix containing the data assigned to a given class $S$.\relax }}{84}{algocf.3}}
-\newlabel{atac_seq_algo_extract_class}{{3}{84}{Extracting data assigned to a class}{algocf.3}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.4}Realignment using JASPAR motifs}{82}{subsection.4.12.4}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.5}Model extension}{82}{subsection.4.12.5}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.6}Extracting data assigned to a class}{82}{subsection.4.12.6}}
+\@writefile{lot}{\contentsline {table}{\numberline {4.1}{\ignorespaces  \textbf  {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url  {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }}{83}{table.caption.42}}
+\newlabel{atac_seq_motif_table}{{4.1}{83}{\textbf {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }{table.caption.42}{}}
+\newlabel{encode_peaks_algo_ndr_extend}{{3}{85}{Extracting data assigned to a class}{algocfline.3}{}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Computes a matrix containing the data assigned to a given class $S$.\relax }}{85}{algocf.3}}
+\newlabel{atac_seq_algo_extract_class}{{3}{85}{Extracting data assigned to a class}{algocf.3}{}}
 \@setckpt{main/ch_atac-seq}{
-\setcounter{page}{85}
+\setcounter{page}{86}
 \setcounter{equation}{7}
 \setcounter{enumi}{13}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
 \setcounter{enumiv}{0}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{0}
 \setcounter{chapter}{4}
-\setcounter{section}{13}
-\setcounter{subsection}{7}
+\setcounter{section}{12}
+\setcounter{subsection}{6}
 \setcounter{subsubsection}{0}
 \setcounter{paragraph}{0}
 \setcounter{subparagraph}{0}
-\setcounter{figure}{12}
-\setcounter{table}{0}
+\setcounter{figure}{11}
+\setcounter{table}{1}
 \setcounter{NAT@ctr}{0}
 \setcounter{FBcaption@count}{0}
 \setcounter{ContinuedFloat}{0}
 \setcounter{KVtest}{0}
 \setcounter{subfigure}{0}
 \setcounter{subfigure@save}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
 \setcounter{subtable@save}{0}
 \setcounter{lotdepth}{1}
 \setcounter{lips@count}{2}
 \setcounter{lstnumber}{1}
 \setcounter{Item}{13}
 \setcounter{Hfootnote}{0}
 \setcounter{bookmark@seq@number}{0}
 \setcounter{AM@survey}{0}
 \setcounter{ttlp@side}{0}
 \setcounter{myparts}{0}
 \setcounter{parentequation}{0}
 \setcounter{AlgoLine}{39}
 \setcounter{algocfline}{3}
 \setcounter{algocfproc}{3}
 \setcounter{algocf}{3}
 \setcounter{float@type}{8}
 \setcounter{nlinenum}{0}
 \setcounter{lstlisting}{0}
 \setcounter{section@level}{0}
 }
diff --git a/main/ch_atac-seq.tex b/main/ch_atac-seq.tex
index d38fe50..a980632 100644
--- a/main/ch_atac-seq.tex
+++ b/main/ch_atac-seq.tex
@@ -1,543 +1,589 @@
 \cleardoublepage
 \chapter{Chromatin accessibility of monocytes}
 \label{atac_seq}
 \markboth{Chromatin accessibility of monocytes}{Chromatin accessibility of monocytes}
 \addcontentsline{chapter}{toc}{Chromatin accessibility of monocytes}
 
+The chapter contains ongoing work. I present the basements of a computational framework to analyse chromatin organization around TF binding sites from ATAC-seq data. As a matter of fact, the results presented here are quite preliminary. However, in the best case, this may shape a basis for other projects. % Because reporting these results, even if incomplete, is at least useless and at most useful to the scientific community, there is no reason not to present them.
+
 Digital genomic footprinting (DGF) methods are a powerful mean to reveal protein occupancy, genome-wide, at once \citep{vierstra_genomic_2016}. These methods allow to identify open chromatin regions within a genome and thus to list its active regulatory sites. These technologies are based on a targeted degradation of the open regions of the genome, either by DNaseI \citep{neph_expansive_2012} or by a transposon-based system \citep{adey_rapid_2010,buenrostro_transposition_2013}.
 
 DGF techologies encounter a yet ever-growing popularity because of the wealth of data produced in a single experiment. Indeed, instead of running thousands - one per transcription factor (TF) - of chromatin immonuprecitpitation followed by sequencing (ChIP-seq) \citep{barski_high-resolution_2007} to know where each TF is binding, it is sufficient to run a single chromatin accessibility assay. The price to pay for this gain of simplicity, compared to ChIP-seq, is a loss of information. Indeed, chromatin accessibility assays allow to list any active regulatory region within a sample, at once but do not give any information about which TF or complex is bound at a given location
 
 The transposon-based method - named assay for transposase-accessible chromatin with high-throughput sequencing (ATAC-seq) - is currently gaining a rapid popularity due to its technical affordability and cheap labor costs compared to DNaseI-based methods.
 
 
 \section{ATAC-seq}
 
 \begin{figure}[!htbp]
 \begin{center}
 	\includegraphics[scale=0.7]{images/ch_atac-seq/ATAC-seq2.png}  
 	\captionof{figure}{\textbf{ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep{vierstra_genomic_2016}.}
 \label{atac_seq_atac_seq}
 \end{center}
 \end{figure}
 
 ATAC-seq assays uses a modified Tn5 transposase enzyme to selectively fragment the accessible regions of the genome \citep{adey_rapid_2010,buenrostro_transposition_2013}. The enzyme inserts small double stranded barcodes inside the DNA wherever it is accessible resulting a the creation of double strand breaks (shown in Figure \ref{atac_seq_atac_seq}). This process, known as tagmentation, allows to i) fragment the genome and ii) inserts sequencing barcodes at once. It should be noted that the Tn5 acts as an homodimer and thus inserts two copy of the same adaptors separated from each other by 9bp \citep{adey_rapid_2010}.
 
 For a given genomic locus, the number of insertions depends on several parameters. First, the Tn5 transposition rate itself depends on  the enzyme and substrate concentrations. Second, it has been demonstrated that the Tn5 posses a slight sequence preference \citep{adey_rapid_2010}. Finally, the most important parameter is steric hindrance. Stretches of DNA occupied by other proteins, such a TF binding sites are protected from transposition. This usually leads to the creation of a characteristic signal at the level of the binding site. 
 
 The experimental readout of ATAC-seq is produced by sequencing the DNA fragment edges – the transposition sites. Mapping these sequences against the genome allows to retrieve the insertion sites. If we represent the density of transposition events along a given region of the genome, the density usually show a decrease at the level of the binding site. This decrease is named “footprint” (see Figure \ref{atac_seq_atac_seq}). Finally, if pair-ended sequencing is performed, it is possible to know the size of each fragment which we will see later, is of biological importance.
 
 
 \section{Monitoring TF binding}
 
 As discusses above, DGF assays are able to highlight active regulatory elements from an entire genome, at once. However, this comes with the price of an information loss. First, even if we can identify active loci likely to be bound by TFs, we have no direct idea about the identities of the TFs bound. Second, we have no idea about the function of those regions. These regions may act as transcriptional activator or repressor. This activity is ultimately bared by the TF and other complexes bound. Thus delineating a region function necessitate to identify the TFs bound here.
 
 This task, even if difficult, can be undertaken by implementing dedicated strategies. First, it is possible to collect evidences about the identity of TF likely to bind at a given location through a motif analysis. TFs can bind DNA directly through their own DNA binding domain or indirectly, through an interaction with at least on other partner TF which binds DNA directly \citep{neph_expansive_2012}. For a given TF, direct binding events can be detected by monitoring the presence of a binding motif if a specificity model is available. Thus a footprint baring a motif is likely to reflect a direct binding event. However, this method has two important limitations : related TF often share a common DNA specificity. For indirect binding, however, nothing can be done to detect such events. Also, evidences about the presence of biggest complexes can be collected by studying the size of the footprint. Large complexes should leave large footprints. This approach, even if limited is able to pinpoint a handful of candidate TFs. 
 
 Second, deciphering the functions of the regulatory elements can be undertaken by looking at the footprint produced by a given factor. Indeed, previous studies have showed that activator and repressor TFs tend to produce different types of footprints \citep{berest_quantification_2018}. Also, the spatial positioning of TF motif within the footprint seemed to be linked with the factor functions \citep{grossman_positional_2018}. For instance, factors associated with the regulation of transcription tend to have a motif in the middle of the footprint whereas factors known to interact with chromatin remodeling factors tend to have a footprint at the edge of the footprint, in contact with the surrounding nucleosomes.
 
 
 \section{The advent of single cell DGF}
 
 Recently, the advent of single-cell (sc) sequencing technologies have been a real game changer in the field of life science. These technological advances allowed to measure gene expression and chromatin accessibility (scATAC-seq) at a yet unprecedented resolution. As bulk sequencing was providing an average overview of what was going on, single-cell sequencing allows to monitor what is happening in each cell of a population. This advance had a profound impact on genomics for two reasons.
 
 First, for the really first time, the heterogeneity of a cell population became accessible and could be studied at the chromatin, transcriptional and protein levels.
 Second, the possibility of collecting high dimensionality data from tenth of thousands of individuals allows genomics to fully enter in the modern big data era, making commonly used machine learning methods usable as the number of parameters to estimate in the models became smaller than the number of individuals in the data \citep{angerer_single_2017}.
 
 
-\section{A quick overview of scATAC-seq data analysis}
+% \section{A quick overview of scATAC-seq data analysis}
 
-So far, most of the single cell technologies are targeted at measuring gene expression through scRNA-seq. Naturally, dedicated algorithms and computational methods have been developed to analyze these data. Currently, the most common types of analyses made are i) data projections and dimensionality reduction such as principal component analysis (PCA), t-stochastic distributed neighbours embedding (t-SNE) or uniform manifold approximation and projection (UMAP) and ii) cell population detection by clustering the cells based on the expression of genes \citep{fan_characterizing_2016, kiselev_sc3:_2017}, by reconstructing gene regulation network \citep{aibar_scenic:_2017} or by identifying cellular states based on the accessible region motif content \citep{gonzalez-blas_cistopic:_2019}. In all cases, the use of scATAC-seq data is to determined whether a region is accessible or not. The downstream analyses characterizes the accessible region using i) the number of reads mapping in these regions as a measure of the accessibility or ii) the sequence content within these accessible regions to determine regulatory topics.
+% So far, most of the single cell technologies are targeted at measuring gene expression through scRNA-seq. Naturally, dedicated algorithms and computational methods have been developed to analyze these data. Currently, the most common types of analyses made are i) data projections and dimensionality reduction such as principal component analysis (PCA), t-stochastic distributed neighbours embedding (t-SNE) or uniform manifold approximation and projection (UMAP) and ii) cell population detection by clustering the cells based on the expression of genes \citep{fan_characterizing_2016, kiselev_sc3:_2017}, by reconstructing gene regulation network \citep{aibar_scenic:_2017} or by identifying cellular states based on the accessible region motif content \citep{gonzalez-blas_cistopic:_2019}. In all cases, the use of scATAC-seq data is to determined whether a region is accessible or not. The downstream analyses characterizes the accessible region using i) the number of reads mapping in these regions as a measure of the accessibility or ii) the sequence content within these accessible regions to determine regulatory topics.
 
 
-\section{Open questions}
+% \section{Open questions}
 
-\begin{figure}[!htbp]
-\begin{center}
-	\includegraphics[scale=0.5]{images/ch_atac-seq/pipeline.png}  
-	\captionof{figure}{\textbf{framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.}
-\label{atac_seq_pipeline}
-\end{center}
-\end{figure}
+% \begin{figure}[!htbp]
+% \begin{center}
+% 	\includegraphics[scale=0.5]{images/ch_atac-seq/pipeline.png}  
+% 	\captionof{figure}{\textbf{framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.}
+% \label{atac_seq_pipeline}
+% \end{center}
+% \end{figure}
+
+Al% l these methods have shown good performances to identify know and new cell populations [REFERENCES]. However, some issues remains open. First, none of these methods uses DGF data to identify different types of footprints or chromatin architecture, in terms of signal shape, at the single cell level.  Second, ATAC-seq measures chromatin accessibility but also provides information about the nucleosome occupancy at accessible genomic regions \citep{buenrostro_transposition_2013}. Thus counting the number of reads mapping at a given loci is, indeed, an indication of accessibility but it does use only a small fraction of the available information. Finally, to date, no study has tried to determine whether what is observed at the bulk level can also be seen at the individual cell level and whether this can be used to infer the molecular state of the cells.
+
+% In this project, I designed and developed the basements of a computational framework to construct a catalog of prototypical chromatin architectures from single-cell data that can later on be used to annotate individual regions, in single cell. Such a method can be useful to determine cellular molecular state and to group cells accordingly. The entire pipeline is illustrate in Figure \ref{atac_seq_pipeline}.
 
-All these methods have shown good performances to identify know and new cell populations [REFERENCES]. However, some issues remains open. First, none of these methods uses DGF data to identify different types of footprints or chromatin architecture, in terms of signal shape, at the single cell level.  Second, ATAC-seq measures chromatin accessibility but also provides information about the nucleosome occupancy at accessible genomic regions \citep{buenrostro_transposition_2013}. Thus counting the number of reads mapping at a given loci is, indeed, an indication of accessibility but it does use only a small fraction of the available information. Finally, to date, no study has tried to determine whether what is observed at the bulk level can also be seen at the individual cell level and whether this can be used to infer the molecular state of the cells.
+\section{Open issues}
 
-In this project, I designed and developed the basements of a computational framework to construct a catalog of prototypical chromatin architectures from single-cell data that can later on be used to annotate individual regions, in single cell. Such a method can be useful to determine cellular molecular state and to group cells accordingly. The entire pipeline is illustrate in Figure \ref{atac_seq_pipeline}.
+I identified two interesting question with regard to ATAC-seq data. First, in the previous chapters, I studied how chromatin is organized in the vicinity of TF binding sites using a pretty standard combination of ChIP-seq, DNase-seq and MNase-seq data. However, I wanted to asses to what extend the same could be done with less and cheaper to produce data. Second, I wonder to what extent single-cell data could be pooled together and used as a bulk sequencing experiment.
 
 \section{Data}
 
 To this end, I choose to work with a publicly available single-cell ATAC-seq dataset from 5'000 human blood monocytes from a healthy donor. These data have been produced by 10xGenomics (\url{https://www.10xgenomics.com}).
 
-10xGenomics is one of the most promising and fast growing company specialized in sequencing technologies in the San Francisco Bay area \citep{hepler_10x_2018}. The core activity is to sell sequencing technologies and data analysis softwares to public and private entities. To advertise their products, 10xGenomics offer a free access to several high quality single cell datasets.
+% 10xGenomics is one of the most promising and fast growing company specialized in sequencing technologies in the San Francisco Bay area \citep{hepler_10x_2018}. The core activity is to sell sequencing technologies and data analysis softwares to public and private entities. To advertise their products, 10xGenomics offer a free access to several high quality single cell datasets.
 
-To demonstrate the capabilities of their sequencing and bioinformatics analysis technologies, pre-processing such as mapping, cell demultiplexing, sequencing adapters trimming, quality control checks have already been performed. Thus working with these data require minimum handling. Additionally, some downstream analyses such as peak calling or clustering have already been performed. For these reasons, this dataset offers all the conditions to be used as a standard to develop and benchmark new analyses methods.
+10xGenomics is a company active in the field of sequencing technologies and data analysis softwares. To demonstrate the capabilities of their sequencing and bioinformatics analysis technologies, 10xGenomics offer a free access to several high quality single cell datasets together with their analysis results. Thus pre-processing steps such as mapping, cell demultiplexing, sequencing adapters trimming, quality control checks have already been performed. Thus working with these data require minimum handling. Additionally, some downstream analyses such as peak calling or clustering have already been performed. For these reasons, their datasets offer all the conditions to be used as a standard to develop and benchmark new analyses methods.
 
 Hg19 mapped reads were downloaded in bam format from \url{http://s3-us-west-2.amazonaws.com/10x.files/samples/cell-atac/1.1.0/atac_v1_pbmc_5k/atac_v1_pbmc_5k_possorted_bam.bam} and the corresponding peaks called on the aggregated data were downloaded in bed format from \url{http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_5k/atac_v1_pbmc_5k_peaks.bed}.
 
 
-\section{Identification of catalog of chromatin architectures}
+\section{Identifying over-represented signals}
 
 The study of signal shape (distribution) has been a quite active field for bulk sequencing experiments during the last decade. Dedicated algorithms \citep{hon_chromasig:_2008} \citep{nielsen_catchprofiles:_2012} \citep{kundaje_ubiquitous_2012} \citep{nair_probabilistic_2014} \citep{groux_spar-k:_2019} have been developed to cluster genomic regions based on their distribution of reads, which reflects their function. The major issue faced where i) to assess whether two regions had the same signal, they had to be properly aligned, ii) even if the regions were properly aligned, they had to be properly oriented and iii) the signal may be sparse due to an different sequencing depth.
 
 
 \subsection{ChIPPartitioning : an algorithm to identify over-represented read patterns}
 
+ChIPPartitioning is an algorithm that has been developed by \cite{nair_probabilistic_2014} to classify regions based on their sequencing profiles and to identify archetypical sequencing densities (or models). Because the algorithm is already presented details in section \ref{encode_peaks_chippartitioning}, it will not be discussed further here. Nonetheless, the reader is invited to read the above mentioned section in order to properly understand the points discussed below.
+
+% Most of the above mentioned  algorithms and softwares deal with some of these issues. However, ChIPPartitioning \citep{nair_probabilistic_2014} (see section \ref{encode_peaks_chippartitioning}) is really interesting. It is a probabilistic partitioning method that softly clusters a sets of genomic regions represented as a vector of counts corresponding to the number of reads (ChIP-seq, DNase-seq) along them. The regions clustered based on their signal shape resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. 
+
+% This algorithm models the signal over a region of length $L$ has having being sampled from a mixture of $K$ signal models, using $L$ independent Poisson distributions. The number of reads sequenced over this region is then the result of this sampling process. The entire set of regions is assumed to have been generated from a mixture of $K$ different signal models (classes). Each class is represented by a vector of $L' \le L$ values that represent the expected number of reads at each position for that class. These values are thus the Poisson distribution parameters. 
+
+% In order to discover the $K$ different chromatin signatures in the data, the algorithm proceed to a maximum likelihood estimation of the Poisson distribution parameters using an expectation-maximization (EM) framework. Given a set of $K$ models, the likelihoods of each region given each class is computed. A posterior probability of each class given each region can, in turn, be computed. These probabilities can be interpreted as a soft clustering. The parameters of the classes are updated using a weighted aggregation of the signal. Since each region is computed a probability to belong to each class, it participates to the update of all the classes, with different weights.
+
+% If the length of the chromatin signature searched $L'<L$, then the algorithm slides a window along the regions and searched for this signature at each possible offset. This is how it deals with alignment issue. The signal orientation issue is tackled by also performing a searched with the flipped model. The procedure is depicted in Figure \ref{atac_seq_em}A.
+
+\subsection{EMSequence : an algorithm to identify over-represented sequences}
+
 \begin{figure}[!htbp]
 \begin{center}
 	\includegraphics[scale=0.10]{images/ch_atac-seq/em.png}  
 	\captionof{figure}{\textbf{Illustration of the expectation-maximization algorithms} \textbf{A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep{nair_probabilistic_2014}. \textbf{B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.\\
 EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.}
 \label{atac_seq_em}
 \end{center}
 \end{figure}
 
-
-Most of the above mentioned  algorithms and softwares deal with some of these issues. However, ChIPPartitioning \citep{nair_probabilistic_2014} (see section \ref{encode_peaks_chippartitioning}) is really interesting. It is a probabilistic partitioning method that softly clusters a sets of genomic regions represented as a vector of counts corresponding to the number of reads (ChIP-seq, DNase-seq) along them. The regions clustered based on their signal shape resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. 
-
-This algorithm models the signal over a region of length $L$ has having being sampled from a mixture of $K$ signal models, using $L$ independent Poisson distributions. The number of reads sequenced over this region is then the result of this sampling process. The entire set of regions is assumed to have been generated from a mixture of $K$ different signal models (classes). Each class is represented by a vector of $L' \le L$ values that represent the expected number of reads at each position for that class. These values are thus the Poisson distribution parameters. 
-
-In order to discover the $K$ different chromatin signatures in the data, the algorithm proceed to a maximum likelihood estimation of the Poisson distribution parameters using an expectation-maximization (EM) framework. Given a set of $K$ models, the likelihoods of each region given each class is computed. A posterior probability of each class given each region can, in turn, be computed. These probabilities can be interpreted as a soft clustering. The parameters of the classes are updated using a weighted aggregation of the signal. Since each region is computed a probability to belong to each class, it participates to the update of all the classes, with different weights.
-
-If the length of the chromatin signature searched $L'<L$, then the algorithm slides a window along the regions and searched for this signature at each possible offset. This is how it deals with alignment issue. The signal orientation issue is tackled by also performing a searched with the flipped model. The procedure is depicted in Figure \ref{atac_seq_em}A.
-
-\subsection{EMSequence : an algorithm to identify over-represented sequences}
-
 ChIPPartitioning algorithm presented an interesting feature : it explicitly models the sequencing signal. Thus it can be adapted to search for different types of signals, for instance nucleosomes architectures or footprints. But because footprints reflects the binding of TFs, it is also critical to be able to identify the motifs within. To this end, I modified ChIPPartitioning in order to discover over-represented sequence motifs. Let us called this new algorithm EMSequence. To this end, the following modifications have been applied to ChIPPartitioning i) how the class signal is modeled and ii) the way data likelihood are computed and iii) the update of the class models. This is illustrated in Figure \ref{atac_seq_em}B.
 
 The input is composed of matrix $D$ of integer of dimensions $N \times L$ containing $N$ DNA sequences $d_{1}, d_{2}, ..., d_{N}$ of length $L$. Each sample $d_{i}=(d_{i1}, d_{i2}, ..., d_{il})$ is a vector of integers encoding the DNA sequence (A=1, C=2, G=3, T=4).
 
 The $K$ classes profiles from which the data originate, instead of being modeled as signal profile, are modeled as sequence motifs $M_{1}, M_{2}, ..., M_{K}$ of expected base probabilities. A class motif $M_{j}$ is a matrix of dimensions $4 \times L'$ with the constrain $\sum_{i=1}^4 m_{i,j} = 1$.
 
 \subsubsection{without shift and flip}
 
 For the case where $L'=L$, the original equation (1) from \citep{nair_probabilistic_2014} to compute the probability of a sequence $d_{i}$ given a class $M_{j}$ is replaced by :
 
 \begin{equation}
 \begin{aligned}
 	P(d_{i}|m_{j}) & =  \prod_{l=1}^{L} m_{j b,l} \\
 	\text{where } b & = d_{il}
 \end{aligned}
 \label{atac_seq_emseq_likelihood}
 \end{equation}
 
 Once the posterior probabilities $P(M_{j} | d_{j})$ have been computed, the original equation (3) in \citep{nair_probabilistic_2014}, to update the class models, is modified as follows :
 
 \begin{equation}
 \begin{aligned}
 	m^{*}_{j b,l} & = \frac{\sum_{i=1}^{N} (P(M_{j} | d_{il}) \times z}
 	                       {\sum_{k=1}^{N} (P(M_{j} | d_{il})} \\
 	\text{with } z & =
 	\begin{cases}
     	1, & \text{if $b = d_{il}$}.\\
    		0, & \text{otherwise}.
 	\end{cases}
 \label{atac_seq_emseq_update_model}
 \end{aligned}
 \end{equation}
 
 where $b$ takes the values $1,2,3,4$ for A, C, G and T respectively.
 
 \subsubsection{with shift and flip}
 
 For the sack of generality, I present the case with shift and flip because cases with shift only or flip only are special cases with shift and flip.
 
 For the case with shifting ($L'<L$) and flipping, the original equation (9) from \citep{nair_probabilistic_2014} to compute the probability of a sub-sequence of length $L'$ starting at offset $s$ in sequence $d_{i}$ given class $M_{j}$ is replaced by :
 
 \begin{equation}
 \begin{aligned}
 	P(d_{i} | M_{j}^{inv}, s) & = \prod_{l=1}^{L'} m_{j b,l}^{inv}\\
 	\text{with } b & =
 	\begin{cases}
     		d_{i,s+l-1}         & \text{if $inv = 1$}.\\
    			4 - d_{i,s+l-1} + 1 & \text{if $inv = 2$}.
 	\end{cases} \\	
 \end{aligned}
 \label{atac_seq_emseq_likelihood_shift_flip}
 \end{equation}
 
 where $inv$ is a notation indicating the orientation. If $inv=1$ we are searching in forward orientation (the forward strand) and $M_{j}^{1} = M_{j}$. If $inv=2$, we are searching in flipped orientation (reverse strand) and $M_{j}^{2}$ is the reverse complement motif of $M_{j}$.
 
 Computing the reverse complement motif $M_{j}^{2}$ of a class motif $M_{j}$ is done using :
 \begin{equation}
 	m_{j i,l}^{2} = m_{j 4-i+1, L'-l+1}
 \label{atac_seq_emseq_reverse_motif}
 \end{equation}
 
 The computation of the posterior probabilities $P(M_{j}, s, inv | d_{j})$ remains the same as in \citep{nair_probabilistic_2014}. With the posterior probabilities, the model update can be undertaken. The original equation (12) in \citep{nair_probabilistic_2014} should be modified. The update of the model is made in a 2 step, by creating an intermediate motif for each strand separately and then by combining them, as follows :
 
 \begin{equation}
 \begin{aligned}
 	m^{*1}_{j b,l} = \sum_{s=1}^{S} \sum_{i=1}^{N} P(M_{j}, s, inv=1 | d_{il+s-1}) \times z^{1} \\
 	\text{with } z^{1} & =
 	\begin{cases}
     	1, & \text{if $b = d_{il+s-1}$}.\\
    		0, & \text{otherwise}.
 	\end{cases} \\
 	m^{*2}_{j b,l} = \sum_{s=1}^{S} \sum_{i=1}^{N} P(M_{j}, s, inv=2 | d_{il+s-1}) \times z^{2} \\
 	\text{with } z^{2} & =
 	\begin{cases}
     	1, & \text{if $4 - b + 1 = d_{il+s-1}$}.\\
    		0, & \text{otherwise}.
 	\end{cases} \\
 	m^{*}_{j b,l} = \frac {m^{*1}_{j b,l}} {\sum_{b'=1}^{4} m^{*1}_{j b',l}} +
 	                \frac {m^{*2}_{j 4-b+1,L'-l+1}} {\sum_{b'=1}^{4} m^{*2}_{j 4-b'+1,L'-l+1}}
 \label{atac_seq_emseq_update_model_shift_flip}
 \end{aligned}
 \end{equation}
 
 where $m^{*1}$ is the partial motif for the forward strand, $m^{*2}$ is the partial motif for the reverse strand and $b$ takes the values $1,2,3,4$ for A, C, G and T respectively.
 
 As in the original algorithm, the optimization process is then carried on for a given number of iterations.
 
 
 \subsection{EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}
 
 For completeness, I also describe a generalized algorithm that performs a classification of a set of regions using several different signal layers over these regions, at the same time.
 
 Because ChIPPartitioning and EMSequence algorithm computations are strictly identical with the exception of the likelihood computations and the model update, it is possible to design a third algorithm, called EMJoint, that models at the same time one or more read coverage signals over a region and its sequence composition. To do so, I simply mixed both previous algorithms and applied the following modifications. For the sake of simplicity, I only expose the version with shift and flip, for one read coverage signal and the DNA sequence, as it is more general.
 
 The input is composed of two matrices of integers, $D$ and $R$, of dimensions $N \times L$. $N$ DNA sequences $d_{1}, d_{2}, ..., d_{N}$ of length $L$ and of $N$ vectors of read counts $r_{1}, r_{2}, ..., r_{N}$ of length $L$ are contained inside both matrices respectively. Each DNA sequence $d_{i}=(d_{i1}, d_{i2}, ..., d_{il})$ is a vector of integers encoding the DNA sequence (A=1, C=2, G=3, T=4) and each read count vector $r_{i}=(r_{i1}, r_{i2}, ..., r_{il})$ is a vector of integers containing the number of reads mapping over the sequences contained in $D$.
 
 Each class is modeled by a vector of length $L'$ of expected number of reads $C_{j} = (c_{j1}, c_{j2}, ..., c_{jL'})$ and by a sequence motif $M_{j}$ of expected base probabilities $M_{j}$ of dimensions $4 \times L'$ with the constrain $\sum_{i=1}^4 m_{i,j} = 1$.
 
 To compute the likelihood $P(r_{i}, d_{i}, s, inv |C^{inv}_{j}, M^{inv}_{j})$ of a region, equation \ref{atac_seq_emseq_likelihood} is modified as follows :
 
 \begin{equation}
 \begin{aligned}
 	P(r_{i}, d_{i}, s, inv |C^{inv}_{j}, M^{inv}_{j}) & = 
 	\prod_{l=1}^L Poisson(r_{i,l}, \lambda=c^{inv}_{j,l}) 	
 	\times 
 	m^{inv}_{j b,l}\\
 	\text{with } b & =
 	\begin{cases}
     		d_{i,s+l-1}         & \text{if $inv = 1$}.\\
    			4 - d_{i,s+l-1} + 1 & \text{if $inv = 2$}.
 	\end{cases} \\	
 \end{aligned}
 \label{atac_seq_emjoint_likelihood}
 \end{equation}
 
 where $\lambda$ is the mean parameter of the $Poisson$ probability mass function.
 
 The posterior probability $P(C_{j}, M_{j} | r_{j}, d_{j})$ computation remain unchanged. Once these values have been computed, it is possible to update both part $C_{j}$ and $M_{j}$ of a class using the original equation (11) from \citep{nair_probabilistic_2014} and equation \ref{atac_seq_emseq_update_model_shift_flip} respectively.
 
 It is possible to further generalize this algorithm in order for it to take $Z$ different input matrices (layers) of dimensions $N \times L$ containing different types of signal (for instance DNA sequences, TF$_{1}$ ChIP-seq, TF$_{2}$ ChIP-seq, DNase-seq, ...) for a set of $N$ regions.
 
 This only requires to adapt how the classes are modeled and equation \ref{atac_seq_emjoint_likelihood} to sum over the $Z$ different layers instead of only two. Additionally, care should be taken to use equation \ref{atac_seq_emseq_likelihood_shift_flip} for DNA sequence layer and equation (6) from \citep{nair_probabilistic_2014} for read count layers.
 
 \subsection{Data realignment}
 % All of the above described algorithms compute a set of posterior probabilities and use them to perform the class model update. As illustrated in Figure \ref{atac_seq_em}, this procedure is actually a weighted and ungaped data alignment in which the posterior probabilities are the weights.
 
 % It is absolutely feasible to run a partitioning on a given matrix $A$, for instance ATAC-seq read counts, using EMRead, and to subsequently use the obtained posterior probabilities to compute the class models, using another data matrix, let us say $B$ of DNA sequence. In this case, equation \ref{atac_seq_emseq_update_model_shift_flip} should be used  because $B$ contains DNA sequences.
 
 % This procedure allows to realign dataset $B$ as $A$ in order to co-visualize different types of signals. The only things that should be taken care of is that matrices $A$ and $B$ should have the same dimensions.
 
 As for ChIPPartitioning, these algorithms compute a set of posterior probabilities and use them to perform the class model update. Thus, each one of them can be used to partition a dataset $A$ and relign another dataset $B$, using the same procedure as described in section \ref{encode_peaks_data_realign} for ChIPPartitioning.
 
 Furthermore, it is absolutely feasible to run a partitioning on a given matrix $A$, let us say of DNA sequences, using EMSequence, and to subsequently use the obtained posterior probabilities to compute the class models, using another data matrix $B$ of ATAC-seq read counts. Care should only be taken to use the appropriate data model computation equation.
 
 In the following sections, this is the procedure that will be used to overlay different types of data for a given partition.
 
 
 \section{Results}
 
 Prior undertaking the chromatin organization study several pre-processing steps and checks have to been taken in order to ensure a proper treatment of the data. 
 
 \subsection{Fragment size analysis}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.3]{images/ch_atac-seq/fragment_lengths.png}
 	\captionof{figure}{\textbf{Fragment size analysis} \textbf{A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf{B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily  set to 500bp. \textbf{C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.}
 \label{atac_seq_fragment_size}
 \end{center}
 \end{figure}
 
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.4]{images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png}  
 	\captionof{figure}{\textbf{Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf{Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf{Middle row :} each position of the reads were used. \textbf{Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\
 The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).}
 \label{atac_seq_ctcf_all_data}
 \end{center}
 \end{figure}
 
 If a TF can protect a stretch of DNA against transposition and create a footprint, so can a nucleosome. As a matter of fact, both cases are biologically drastically different. Nucleosome compete with TFs to bind on DNA \citep{voss_dynamic_2014}. Thus nucleosome footprints represent regions of the genome that cannot be bound by TFs, if we except pioneering factors \citep{cirillo_opening_2002,zaret_pioneer_2011,soufi_pioneer_2015}. Mixing nucleosome and TF footprints could bias downstream analyses.
 
 Nucleosomes fragments are expected to be large, as a nucleosome is wrapped by \~150bp of DNA whereas nucleosome free region fragments can be expected to be shorter. Long nucleosome free region fragments are unlikely. The longer an accessible region is, the most likely an insertion will happen resulting in the creation of two shorter fragments. A fragment size analysis allowed to identify different categories of fragments (Figure \ref{atac_seq_fragment_size}). In this figure, open regions, mono- and di-nucleosome fragments are clearly visible. Morever, a 10bp periodicity oscillations reflecting the DNA pitch is also visible. This pattern is expected and indicates a good data quality \citep{buenrostro_transposition_2013}.
 
 Rather than assigning arbitrary fragment size threshold to separate the categories, I preferred to use the approach developed by \citep{buenrostro_transposition_2013}. The fragment sizes were fitted by a mixture of three Gaussian distributions. Then, the limits for each fragment class was defined as the size at which the probability of assignment to that fragment class dropped under 0.9 (Figure \ref{atac_seq_fragment_size}B).
 
 This method ensured the classification of 134 millions of fragments, leaving ~46 millions reads unassigned (Figure \ref{atac_seq_fragment_size}C). However, this reduces drastically the risks of fragment mis-classification and protects the downstream analyses from a strong bias.
 
 
 \subsection{Measuring open chromatin and nucleosome occupancy}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.3]{images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png}  
 	\captionof{figure}{\textbf{Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.}
 \label{atac_seq_ctcf_sp1_myc_ebf1_footprint}
 \end{center}
 \end{figure}
 
 Once the different fragment populations have been identified, the next question to solve is how should each category of fragment be represented?
 
 First, for open chromatin fragment, it is clear that we want to know where the DNA is accessible. This information is provided by the fragment edges – the transpositions sites. However, to account for the fact that the Tn5 transposase acts as a homo-dimer and inserts two barcodes side by side \citep{adey_rapid_2010}, the fragment edges positions were modified by +4bp for reads mapping the + strand and -5bp for reads mapping the - strand, as done in other studies \citep{buenrostro_transposition_2013,li_identification_2019}. 
 
 Second, for mono and di-nucleosome fragments, we are interested in knowing where the nucleosomes are sitting. For this, the fragment edges may not be the most informative. A better way to represent those fragments would be to use the center positions, which should correspond to the dyad for mono-nucleosomes or even to consider the entire reads or fragments.
 
 To test these hypotheses I investigated the different signal aggregations around predicted CTCF binding sites using. The signal, +/- 1kb around the motif, was aggregated inside bins of 1, 2 or 10bp size. CTCF predicted binding sites were good candidates because CTCF is know to bind mostly through its motif (\cite{neph_expansive_2012} and Figure \ref{encode_peaks_gm12878_motif_prop}). Additionally CTCF binding produces a really typical chromatin architecture with strongly positioned nucleosomes arrays \citep{fu_insulator_2008} and leaves a footprint \citep{neph_expansive_2012}.
 
 As seen in Figure \ref{atac_seq_ctcf_all_data}, entire open chromatin reads and fragments do not allow to visualize a footprint signature (upper and middle rows, red lines). Both of them, nonetheless highlight open chromatin regions. The footprint becomes visible when considering the edges of the open chromatin fragments (bottom row, red line). Increasing the bin size blurs it and eventually makes it disappear (10bp, lower right).
 
 Regarding nucleosomes, considering the entire fragments blurs the signal (upper row, blue and green lines) and the entire reads reveal the region upstream and downstream of the nucleosomes (middle row, blue and green lines). The only way to obtain a precise nucleosomes occupancy information was to use the middle position of the mono-nucleosome fragments (bottom row, blue line). Interesting enough, the middle position of di-nucleosome fragments indicates the DNA linker between two adjacent nucleosomes but does not accumulate in open chromatin regions (bottom row, green line). This suggested that di-nucleosome fragments could be separated in two mono-nucleosome fragments. I tested this hypothesis by simply dividing a di-nucleosome fragment in two smaller ones, at its center position. I then pooled these new fragments with the mono-nucleosome fragments to create a nucleosome fragment dataset. When looking at the middle of these fragments, they could perfectly reveal the nucleosomes directly adjacent to the CTCF motif. Additionally this nucleosome dataset was also able to reveal a second nucleosome in the arrays (bottom row, violet lines). 
 
 To further support these results, I also measured the chromatin organization (+5/-4 corrected read edges for open chromatin and center of the nucleosome fragments from the nucleosome fragment dataset) around SP1, myc and EBF1 binding motifs as well. As shown in Figure \ref{atac_seq_ctcf_sp1_myc_ebf1_footprint}, the aggregation of the signal arount the CTCF and SP1 motifs show an enhanced accessibility on the motif as well as a clear footprint. Moreover, the footprint is in a nucleosome free region. The situation was different for myc and EBF1. Neither of the two aggregations showed a nucleosome free region, nor an increased accessibility around the motif. Regarding myc, even though its aggregation presented a signal compatible with a local protection of its motif, this was shallow in comparison of CTCF and SP1. Finally, EBF1 presented a somewhat decreased accessibility around its motif and a striking increase accessibility directly at the level of the motif.
 
 CTCF and SP1 motifs are supporting the fact that footprints and nucleosome occupancy can be revealed using this method. Together with myc and EBF1, they clearly show an heterogeneity of chromatin organizations, at least at the aggregation level.
 
 There are many possible explanations for these results. One of them is that the aggregation hides the variability of the individual regions and that SP1 and CTCF present a more conserved organization around their motif than myc and EBF1. Another would that the most visible and obvious footprint reflect an stronger TF activity. However, one should remain cautious on the interpretation of aggregation patterns as the individual sites signal may interfere with each other, creating an artificial aggregation that does not exist at any individual site \citep{kundaje_ubiquitous_2012}.
 
 In the light of these results, I decided to use the +5/-4 corrected edges of the open chromatin reads to investigate footprints and the fragment centers of the newly created nucleosome dataset to investigate nucleosome occupancy. If not explicitly stated otherwise, the reader should consider that any signal is measure is performed using this procedure if not explicitly state otherwise.
 
 \subsection{Evaluation of EMSequence and ChIPPartitioning}
 
 Before continuing further in the analysis of the data, it was important to assess the performances of these partitioning methods to discover sequence motifs and footprint classes.
 
 % To evaluate the behavior of each algorithm, a simple situation was considered. As in the previous section, a list of predicted CTCF, SP1, EBF1 and myc binding sites were compiled using a PWM genome scan. For each TF, the open chromatin read density, the nucleosome occupancy and the DNA sequences were extracted.
 
 %  This case can be considered simple for two reasons. First, for a given TF, all the regions have a motif instance. Second, this motif instance is located exactly in the center of the region, in the same orientation. Thus no shifting nor flipping is required in order to reveal footprints. The only necessary thing may be to discover different types of footprints.
 
 
 \subsubsection{EMSequence}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.4]{images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png}  
 	\captionof{figure}{\textbf{Classification performances on simulated data :} \textbf{Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf{Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.}
 \label{atac_seq_emseq_auc_roc}
 \end{center}
 \end{figure}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.35]{images/ch_atac-seq/sp1_motifs_7class.png}  
 	\captionof{figure}{\textbf{SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.}
 \label{atac_seq_emseq_sp1_10class}
 \end{center}
 \end{figure}
 
 In order to measure the ability of EMSequence to retrieve over-represented motifs from a set of sequences, I simulated 2'000 synthetic DNA sequences of 100bp long. The sequences were separated in two classes. Each class was defined by a 8bp sequence motif (Figure \ref{suppl_atac_seq_emseq_best_motifs}). Each sequence had exactly one motif occurrence, anywhere in the sequence (with a uniform probability), on either strand (equiprobable).
 
 These sequences were partitioned with flipping into 2 classes by EMSequence in order to find 2 motifs of 11bp ($100bp - 11bp + 1 = 90$bp of shifting). The optimization was run for 200 iterations. To assert the quality of the motifs discovered, I set up a classification framework inspired by PWMEval-ChIP-peak (see section \ref{section_smileseq_pwmeval}). Using equation \ref{smile_seq_pwmeval_score}, each sequence was scored with both model of each partition and the area under the curve (AUC) of the receiver operator characteristic (ROC) value was computed for each partition. The same was done using the true motif models. Because EMSequence is sensitive to its initial state, 50 partitions were performed. As shown in Figure \ref{atac_seq_emseq_auc_roc}, the de-novo discovered models are as good as the actual sequence motifs to segregate both sequence classes. Additionally, a visual inspection of the discovered motif logo confirmed that most of the discovered motifs actually match the true sequence motifs (Figure \ref{suppl_atac_seq_emseq_best_motifs}).
 
 In order to further demonstrate the ability of EMSequence on a more significant biological case, I investigated SP1 sequence specificity.  As for ChIPPartitioning, a list of 15'883 predicted SP1 binding sites were compiled using a PWM genome scan. The sequences +/- 400bp around the motif instance centers were extracted. Thus, all regions contained at least one SP1 site. Thus, retrieving the SP1 binding site is expected. Additionally, as SP1 tends to bind to promoters [REFERENCE], we cannot exclude to see other motif being being discovered. These sequences were then given to EMSequence to search for several different 31bp long motifs ($801 - 31 + 1 = 771$ of shifting freedom). The optimization was run for 20 iterations.
 
 The motifs that were retrieved matched the expectations (Figure \ref{atac_seq_emseq_sp1_10class}). All classes retrieved an SP1 motif. Four classes (1,5,6,7) retrieved a single SP1 motif. Even though they are highly similar, they vary in term of flanking regions (class 6 versus 7 for instance). Class 3, which contained a surprisingly long motif, representing 24\% of the data, actually captured a LINE element. Indeed, the "GCAGCGAGGCTGGGGGAGGGGC" is characteristic of it (determined using BLAT \citep{kent_blatblast-like_2002} on the UCSC Genome Browser). Finally, and more interesting, classes 2 and 4 could capture two rare (about 1\% of the cases each) tandem repeats of SP1 motifs with two different spacers (1 and 9bp). Additionally, head-to-head SP1 motif repeats could be detected (\ref{suppl_emseq_sp1_10class}, classes 1 and 4). This suggested that SP1 binds as i) an homo-dimer or ii) as an hetero-dimer with another member of its family, binding a resembling motif. Moreover, the tandem and heat-to-head motif repeats suggested that different structural arrangement exist. According to BioGrid \citep{chatr-aryamontri_biogrid_2017}, SP1 has been reported to physically interact with SP1 (homo-dimer), SP3 and SP4 (hetero-dimer). According to JASPAR 2018 matrix clustering \citep{castro-mondragon_rsat_2017}, the KLF and EGR families recognizes similar motifs. Members of either families are also listed as SP1 interactors in Biogrid (KLF4, KLF6, KLF9, KLF10 and EGR1).
 
 The lack of non-SP1 motif discovered could be explained by at least one reason. The list of SP1 binding sites compiled was performed using a quite stringent threshold. The consequence is that the motif instances are highly similar to each other. This makes SP1 motifs strongly dominant within the dataset. Since EMSequence optimizes a set of models, it is highly sensitive to its starting state. In this experiment, EMSequence was initialised randomly. Because of the dominance of SP1 motifs within the data, it is likely that the different classes were attracted by them rather than allowed to diverge to detect other motifs.
 
 Together, these evidences support the fact that EMSequence is suited to perform a meaningful partition of DNA sequences and to retrieve biologically important DNA motifs.
 
 
 \subsubsection{ChIPPartitioning}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.30]{images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png}  
 	\captionof{figure}{\textbf{Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.}
 \label{atac_seq_emread_ctcf_noshift_flip}
 \end{center}
 \end{figure}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.35]{images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png}  
 	\captionof{figure}{\textbf{Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.}
 \label{atac_seq_emread_ctcf_shift_flip}
 \end{center}
 \end{figure}
 
 A complete benchmark of the ChIPPartitioning has been performed in \citep{nair_probabilistic_2014}. In this paper, the authors have generated simulated ChIP-seq data with patterns to retrieve, at different coverages and compared the performances with other similar software. It turned out that ChIPPartitioning was the best performing method. For this reason, I did not repeat this benchmark. However, ChIPPartitioning ability to retrieve footprint classes from from ATAC-seq data has not been performed yet.
 
 To evaluate this, a simple situation was considered. As in the previous section, a list of predicted CTCF and SP1 binding sites were compiled using a genome scan with suited binding models. For each TF, the open chromatin read density around these sites was measured +/-400bp aroud the motif instances, at the single base pair resolution, and classified. As the motif instances were already aligned in the center of the regions, no shifting was used. However, the region orientations were not corrected based on the strand on which the motif instance appeared.
 
 To evaluate the capability of ChIPPartitioning to retrieve classes of footprints, these data were classified
 i) without shifting and with flip (Figure \ref{atac_seq_emread_ctcf_noshift_flip} and Figure \ref{suppl_emread_sp1_noshift_flip}) and ii) with shifting and flipping (Figure \ref{atac_seq_emread_ctcf_shift_flip} and \ref{suppl_emread_sp1_shift_flip}). 
 
 First, in both conditions - with and without shifting - different open chromatin signal classes have been discovered. Second, in most cases, the chromatin accessibility is anti-correlated with the nucleosome occupancy, which is something expected. However this is not always the case, such as in Figure \ref{atac_seq_emread_ctcf_noshift_flip} classes 3 and 6. Such pattern may reflect a complex chromatin architecture, with variably positioned nucleosomes, that the partition cannot realign. But it is also likely to be an artifactual signal caused by the partition itself. Third, allowing the regions to be flipped based on the chromatin accessibility signature (Figure \ref{atac_seq_emread_ctcf_noshift_flip} and Figure \ref{suppl_emread_sp1_noshift_flip}) does not allow to resolve properly the orientation of the underlying CTCF and SP1 motif instances. Indeed, the sequence logos, in the center, are symetric indicating a superposition of +strand and -strand motif. Finally, allowing a moderated shifting freedom (+/- 10bp, Figure \ref{atac_seq_emread_ctcf_shift_flip} and \ref{suppl_emread_sp1_shift_flip}) results in blurred out sequence logo. This indicates that the chromatin accessibility signal realignment unphased the underlying CTCF and SP1 motif instances. Thus the signal that is observed does not represent classes of footprints.  
 
 In this case, each the region contained a motif instance at its center. Nonetheless, even a limited shifting according to the open chromatin signal resulted in the dephasing the underlying motif instances. Trying to resolve the motif orientation by allowing flipping according to the open chromatin was not more successful. Thus, discovering footprint classes from a highly unaligned set of regions does not seem to be possible. Workaround strategies have to be found.
 
 
 \section{Aligning the binding sites}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.30]{images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png}  
 	\captionof{figure}{\textbf{Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.}
 \label{atac_seq_23class}
 \end{center}
 \end{figure}
 
 To create a catalog of chromatin architectures around TF binding sites in monocytes, it is necessary to be able to align the regions of interest properly (with respect to the binding sites) or to have methods able to deal with this issue. 
 
 The list of active regulatory regions was assumed to correspond to regions of high ATAC-seq signal. Consequently, I choose to used the peak list generated by 10xGenomics for this dataset as the list of regulatory regions of interest. 
 
 Because ChIPPartitioning did not seem to be able to realign regions containing regulatory elements based on their chromatin accessibility profiles, I decided to use EMSequence to i) find TF binding sites in each region and ii) align the regions based on the occurrence of these TFs. Because EMSequence creates a probabilistic alignment, it can deal with the fact that each region can bear an several binding sites from different TFs. Finally, a de-novo motif discovery was not needed strictly speaking. Instead, I restricted the analysis on a priori important TFs using a single iteration of EMSequence.
 
 A handful of important TFs in monocytes \citep{kurotaki_transcriptional_2017,rico_comparative_2017} were selected : jun, HIF1a, myc, PU.1, CEBPB, IRF2, IRF4, IRF8, LHX3, FOXH1, SOX, MEF2c, ELF5, STAT6, NFE2, AHR, E2F2, E2F3, KLF2, KLF4 and NR4A1. Additional TFs and TF families were selected to widen spectrum of TF families included in the analysis : CTCF, the EGR family, the GATA family, the NFAT family and the RUNX family.
 
 Because TFs within a given family tend to bind the same motif (for instance IRF4 and IRF8 or E2F2 and E2F3), binding models representative for sets of TFs were selected from the JASPAR database motif clustering \citep{castro-mondragon_rsat_2017}. In total, 23 binding models were used to initialize as many classes for EMSequence to discover. EMSequence was run for one iteration to classify 70'462 sequences of 1001bp centered on the ATAC-seq peaks, with flipping and 971 of shifting freedom (thus searching motifs of 30bp). Based on the alignment and the data, the resulting 30bp ATAC-seq and sequence and read density models were then extended of 500bp on each side to reveal the organization of regulatory sequences (Figures \ref{suppl_atac_seq_23class} and \ref{atac_seq_23class}). 
 
 First, from the class aggregations, footprints are clearly visible over the TFs binding motif. This strongly suggest that the region realignment worked properly. Second the 23 different classes showed various types of footprints. For instance, CTCF shows its usual strongly positioned nucleosome arrays together with a clear chromatin opening over the motif supporting CTCF binding. The important monocyte TF PU.1 also shows an increased chromatin accessibility at its binding sites. However, the footprint drastically differ from CTCF in the sense that a clear a wide signal drop - larger than PU.1 motif only - is visible. It is also concomitant with an increased nucleosome occupancy. Conversly, LHX3 shows a pattern that rather suggest a modest chromatin opening. Finally, KLF's family binding sites a strong chromatin accessibility rather than a protection of the bound sequences, which may be compatible with a transcriptional repressive activity \ref{berest_quantification_2018}.
 
 Third overall class probabilities gives an indication of the regulatory element content in term of motif. Its seems that CTCF motif is the most common one even though it does not mean that each motif instance is bound or even functional.
 
 
 \section{Exploring individual TF classes}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.30]{images/ch_atac-seq/data_classCTCF_8class.png}  
 	\captionof{figure}{\textbf{CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.}
 \label{atac_seq_ctcf_subclass}
 \end{center}
 \end{figure}
 
 The results shown in the previous section are per TF aggregation profiles. Thus a further exploration of each class is required to investigate whether several different footprint classes can be isolated per TF. To do so, I extracted the data assigned to each class and run ChIPPartitioning on these data. Because the regions have already be aligned properly to have the TF binding motif in their center, ChIPPartitioning was not allowed shifting nor flipping.
 
 As expected, applying this method on the CTCF, PU.1 and AP1 classes refined the results. For instance, the CTCF class data classification (Figure \ref{atac_seq_ctcf_subclass}) showed sub-classes in which CTCF motif instances were likely not bound (classes 8, 6, 7, 2 and 5) as well as sub-classes in which they were likely bound (classes 4, 3 and 1). In the latter group, several chromatin organizations could be revealed, with approximately 35\% of the motif instances showing the canonical CTCF chromatin organization (class 4).
 
 The same is illustrated for PU.1 and AP1 classes (Figure \ref{suppl_atac_seq_pu1_subclass} and \ref{suppl_atac_seq_ap1_subclass}). In both cases, it was possible to identify bound and unbound motif instance sub-classes. Also, for these two TFs, the nucleosome are not visible, in line with my previous results showing that only CTCF has nucleosome arrays organized with respect to its binding sites (see Chapter \ref{encode_peaks}).
 
 \section{Discussions}
 
 Even though preliminary, these results showed that this computational framework can turn useful to analysis the chromatin organization around TF binding sites using ATAC-seq data.
 
 First, not much of a surprise, applying population level analyses to the pool of single cell data gave meaningful results.
 
 Second, ChIPPartitioning turned out to be useless to properly phase unaligned regions based on their chromatin accessibility patterns. Instead, the newly proposed EMSequence algorithm turned out to be usable for this task, in a special setting, and was able to produce a meaningful per TF data realignment.  As a reminder, short models were searched (and thus large shifting freedom was set). This alignment was then used to realign larger regions and revealed footprints. Also, a priori knowledge was fed in the form of the initial sequence models values taken from TF binding model databases.
 
 Third, I presented a method to extract data assigned to a class, from a probabilistic partition, without using any hard assignment shortcut. Running ChIPPartitioning on these data then turned out to revealed different chromatin organization for each TF, allowing to distinguish between likely bound and unbound motif instances.
 
 \section{Perspectives}
 
 As a fact, in its current form, this framework is incomplete. For instance, a method to estimate the fit of a given partition and choose the best one is required. This would help choosing the appropriate number of classes to search. This could be implemented using the Akaike information criterion \citep{marsland_machine_2015-1}.
 
 So far, most of the single cell technologies are targeted at measuring gene expression through scRNA-seq. Cell sub-population detection by clustering expression matrix \citep{fan_characterizing_2016, kiselev_sc3:_2017}, using gene regulatory network reconstruction \citep{aibar_scenic:_2017} or by identifying cellular states based on the accessible region motif content \citep{gonzalez-blas_cistopic:_2019} are popular. Currently, the use of ATAC-seq data at the single cell level is mostly limited to a binary open/closed classification.
 
 One can imagine using the above described framework to draw a catalog of chromatin structures from the pooled data and use it to annotate each cell. More precisely this could be done by going back to each peak in each cell and assigning a qualitative label corresponding to the chromatin model that matches the best this region in this cell. Ultimately, this would lead to the creation of a matrix (cells x regions) that could be used to run clustering methods.
 
 How the similarity should be computed and whether each cell will have a high enough coverage for similarity computations to be meaningful remain open questions. Alternatively, one can imagine replacing single cell by different bulk experiments. In this case, the clustering would not isolate cell sub-populations but experiments (individuals, culture conditions, etc) that are similar to each other.
 
 \section{Methods}
 
 \subsection{Implementations}
 
 In order to allow an easy handling and a quick treatment of the data, the algorithms and procedures described above have been implemented in C++ and fully multi-threaded.
 Here is a list of the relevant C++ implementations :
 
 \begin{itemize}
 \item EMRead : implementation of the EMRead algorithm. Takes a read count data matrix as input, the number of classes, the shifting and flipping parameters and return the posterior probabilities. 
 
 \item EMSequence : implementation of the EMSequence algorithm. It takes a DNA sequence data matrix as input, the number of classes, the shifting and flipping parameters and return the posterior probabilities. 
 
 % \item EMJoint : implementation of the generalized EMJoint algorithm. It takes any number of data matrix as input, the number of classes, the shifting and flipping parameters and return the posterior probabilities. This program can be given 0 or 1 DNA sequence matrix and any number of read count matrices as input.
 
 \item ProbToModel : implementation of the data realignment procedure. It takes a data matrix (DNA sequence or read counts) and some posterior probabilities as input and returns the class models.
 \end{itemize}
 
 \subsection{Fragment classes}
 The distribution of size was modeled as a mixture of three classes, each following a Gaussian distribution. Each class fragment length distribution was modeled using :
 
 \begin{equation}
 \begin{aligned}
 	f(x) & = a  \times \exp^{\frac{-(x-m)^{2}}{2 \times s}}
 \end{aligned}
 \label{atac_seq_fragment_length_class}
 \end{equation}
 
 where $x$ is the fragment length, $m$ the mean fragment length for this class, $s$ the fragment length standard deviation and $a$ an amplitude factor.
 
 The mean parameters were initialized to 50, 200 and 300bp. The standard deviation parameters were initialized to 10, 10 and 30bp and the amplitude factors to 1. The parameters were fitted to the data using the the nls() function in R implementing the Gauss-Newton algorithm.
 
 \subsection{Simulated sequences}
 TODO
 % I simulated 2'000 synthetic DNA sequences of 100bp long. The sequences were separated in two classes. Each class was defined by a 8bp sequence motif (Figure \ref{suppl_atac_seq_emseq_best_motifs}). Each sequence had exactly one motif occurrence, anywhere in the sequence (with a uniform probability), on either strand (equiprobable). The motif sequence was sampled using the corresponding class model. Finally, the bases outside the sequence were sampled using a mono-nucleotide model with 0.25 probability for each base.
 
 \subsection{Realignment using JASPAR motifs}
-TODO
+23 binding models were downloaded from the motif clustering of JASPAR \citep{castro-mondragon_rsat_2017}. Briefly, the motif clustering is made of a forest of trees (each tree is a cluster= in which the leaves are the individual TF binding models (motifs). Internal nodes binding models are also available. As a matter of fact, they represent a consensus over multiple individual TF binding models. In order to i) have models representing the binding specificity of the TFs of interest and ii) widen the analysis to other TFs if they were sufficiently related to one of the TFs of interest in terms of specificity, I manually selected binding motifs, in the different motif trees, that would fit these requirements.
 
-\subsection{Display of motif logo}
-TODO
+The downloaded models were :
+
+\begin{table}
+\begin{center}
+	\begin{tabular}{ |l|l|p{90mm}|l| }
+	\hline
+  	\multicolumn{4}{|c|}{Binding models downloaded} \\
+  	\hline
+  	Cluster ID & Node ID & TFs covered & Name \\
+  	\hline
+	 1 & 74    & ARID3b, LHX3                             & LHX3 \\
+	 2 & 12    & ESRRG, NR4A1, ESRRB, NR2F2               & NR4A1 \\
+	 3 & 23    & FOSL1::JUNB, FOSL1::JUN, FOS::JUND, \newline FOSL2::JUN, FOS::JUNB, JDP2, NFE2, FOSL1, FOS, JUND, FOSL2, JUNB, JUN::JUNB, FOSL1::JUND, FOS::JUN, FOSL2::JUND, FOSB::JUNB, FOSL2::JUNB, BATF::JUN, JUN & AP1 \\
+	 3 & 24    & NFE2L2, BACH1::MAFK, MAF::NFE2, BACH2    & NFE2 \\
+	 4 & 22    & max::myc, MXI1, myc, mycn                & myc \\
+	 4 & 30    & ARNT, AHR::ARNT                          & AHR \\
+	 4 & 31    & HIF1A, HES5, HES7                        & HIF1A \\
+	 5 & 20    & CEBPA, CEBPG, CEBPD, CEBPB, CEBPE        & CEBPB \\
+	 7 & 13    & SPIC, SPI1                               & PU.1 \\
+	 7 & 17    & ELF5, ELF3, EHF, ELF1, ELF4              & ELF5 \\
+	19 & 2     & NFAT5,NFATC1,NFATC3                      & NFAT \\
+	20 & 4     & MEF2C,MEF2B,MEF2A,MEF2D                  & MEF2C \\
+	21 & 5     & GATA3, GATA5, GATA4, GATA6, GATA1, GATA2 & GATA \\
+	28 & 13    & EGR2, EGR4, EGR1, EGR3                   & EGR \\
+	28 & 14    & KLF4,KLF1,KLF9                           & KLF \\
+	31 & 4     & IRF7, IRF9, IRF4, IRF8, IRF5             & IRF4 \\
+	31 & 5     & STAT1::STAT2, IRF2                       & IRF2 \\
+	32 & STAT6 & STAT6                                    & STAT6 \\
+	33 & 1     & SOX3, SOX6                               & SOX3 \\
+	38 & 3     & RUNX1, RUNX2, RUNX3                      & RUNX \\
+	39 & 1     & E2F3, E2F2                               & E2F2 \\
+	48 & CTCF  & CTCF                                     & CTCF \\
+	66 & 1     & FOXH1                                    & FOXH1 \\
+	\hline
+	\end{tabular}
+	\captionof{table} { \textbf{TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url{http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.}
+\label{atac_seq_motif_table}
+\end{center}
+\end{table}
+
+Each binding model was downloaded in JASPAR format and then converted to a per-position probability letter matrix. 
 
 \subsection{Model extension}
 TODO
 
 \subsection{Extracting data assigned to a class}
 
 The difference between hard and soft clustering (such as ChIPPartitioning) methods is that in soft clustering "the output is a membership function, so each pattern can belong to more than one group with varying degrees of membership" \citep{dalton_clustering_2009} while in hard clustering each pattern is assigned to only one group. In the former case, isolating all regions assigned to a class $X$, creating a matrix of read density and re-running the clustering method on this matrix is straightforward and would do the trick. In the latter case, this is also possible but requires to account for the specificity described above.
 
 Let's assume that a first matrix $R$ of dimensions $NxL$ containing $N$ regions of length $L$ has been partitioned in $K$ classes by ChIPPartitioning, with shifting freedom $S<L$ and flip. This created a probability matrix $P$ of dimensions $NxKxSx2$ (region, class, shift, flip). Computing a new matrix $R^{class}$ to extract the signal of the class of interest $X$ is actually related to the model update performed by ChIPPartitioning (see Equation 11 from \cite{nair_probabilistic_2014}). However, instead of summing (aggregating) everything in a single vector containing the class model, we want to unfold it into a matrix.
 
 As a reminder, using a shifting freedom $S$ creates class models of length $L-S+1$. In other words, shifting makes the class model smaller than the matrix length.
 
 Let us construct an extended matrix of read densities $R'$ of dimensions $NxL'$ where $L'=L+S$. Constructing $R'$ is equivalent to adding $S/2$ columns on each side of $R$. Finally, to construct the final matrix $R^{class}$ of dimensions $NxL$ we need to used the modified "class model update" described in algorithm \ref{atac_seq_algo_extract_class}. Because some regions (rows) can be assigned with a really low probability to a given class, this corresponding rows in $R^{class}$ show a really low signal - often no signal at all. Consequently, the rows without signal were filtered out.
 
 \SetKwProg{Fn}{}{\{}{}\SetKwFunction{Function}{matrix ExtractClassData}%
 \begin{algorithm}[H]
 	\label{encode_peaks_algo_ndr_extend}
 	\Fn{\Function{}}
 	{	\KwData{The matrices $R'$ and $P$.}
 		\KwResult{The class matrix $R^{class}$}
 		
 		\tcp{overall class probabilies}
 		
 		$class.prob =$ vector of K 0's \;
 		$tot = 0$ \;
 		\For{$i from 1 to N$}		
 		{	\For{$j from 1 to K$}
 			{	\For{$k from 1 to S$}
 				{	\For{$l from 1 to 2$}
 					{	$class.prob_{j} \pluseq p_{i,j,k,l}$ \;
 						$tot \pluseq \pluseq p_{i,j,k,l}$ \;
 					}
 				}			
 			}		
 		}
 		\For{$j from 1 to K$}
 		{	$class.prob_{j} \divideq tot$ \; }
 		
 		\tcp{modified class model update}
 		
 		\For{$i in 1 to N$}
 		{	\For{$s in 1 to S$}
 			{	
 				\tcp{forward orientation}
 				$from.dat2.fw = s $ \;
 				$to.dat2.fw = from.dat2.fw + L - 1$ \;
 				$j.dat3.fw = 1$ \;
 				\For{$j.dat.2.fw from from.dat2.fw to to.dat.2.fw$}
 				{	
 					$R^{class}_{j,j.dat3.fw} \pluseq \frac{P_{i,X,s,1} \times R'_{i,j.dat.2.fw}} {class.prob}_{X}$ \;
 					$j.dat3.fw \pluseq 1$ \;
 				}
 								
 				\tcp{reverse orientation}
 				$j.dat3.fw = 1$ \;
 				$from.dat2.rv = L' - 1 -s $ \;
 				$to.dat2.rv = from.dat2.rv - (L-1) $ \;
 				\For{$j.dat.2.rv from from.dat2.rv down to to.dat.2.fw$}
 				{	$R^{class}_{j,j.dat3.fw} \pluseq \frac{P_{i,X,s,2} \times R'_{i,j.dat.2.rv}} {class.prob_{X}} $ \;	
 					$j.dat3.fw \pluseq 1$ \;
 				}
 			}
 		}
 		\Return{$R^{class}$}
 	}
 	\caption{Computes a matrix containing the data assigned to a given class $S$.}
 	\label{atac_seq_algo_extract_class}
 \end{algorithm}
diff --git a/main/ch_encode_peaks.tex b/main/ch_encode_peaks.tex
index d5807cd..5293785 100644
--- a/main/ch_encode_peaks.tex
+++ b/main/ch_encode_peaks.tex
@@ -1,440 +1,440 @@
 \cleardoublepage
 \chapter{ENCODE peaks analysis}
 \label{encode_peaks}
 \markboth{ENCODE peaks analysis}{ENCODE peaks analysis}
 \addcontentsline{toc}{chapter}{ENCODE peaks analysis}
 
 % Modeling a TF sequence specificity only allows to partially understand how a TF binds a region. Indeed, scanning a genome using a PWM for putative binding sites often returns tens of thousands of sites with only a subset of them being really occupied within a cell. Other elements such as chromatin organization and composition are likely to drive TF binding. Thus gaining a better understanding about the chromat
 
 % The exact mechanisms at play remain unclear but nucleosome occupancy is thought to shelter DNA sequence - as some bases are facing the core octamer or to distort the DNA structure - impeding sequence recognition by TFs. In vivo, evidences for competition between TFs and nucleosomes have been collected. Computational simulations accounting for simultaneous multiple factor binding on DNA suggested that nucleosome occupancy and TFs binding influence each other and that TF binds nucleosome depleted regions \cite{wasson_ensemble_2009}.
 
 As discussed above, the organization of chromatin has a deep impact on TF binding. Nucleosomes and TFs are in competition to bind DNA. Because TFs are ultimate forces driving gene expression, understanding how chromatin influence them, or at least how chromatin is organized around them, is crucial.
 
 It is now clear that nucleosome occupancy fulfills more than a packaging role. It can also acts as a barrier to impede DNA reading processes and compete with TFs for sequence occupancy.
 
 Thus gaining a better understanding of how chromatin is organized around TF binding sites is crucial to understand TF binding beyond their sequence specificity only.
 
 In an effort to better understand how the genome is organized and how its functions are fulfilled, 
 the ENCODE Consortium which released an impressive collection of coherent data representing an unprecedented picture of the chromatin in human cell lines. 
 
 The GM12878 cells were chosen as one of the highest priority cell line. GM12878 are widely-used lymphoblastoids. Because of their ability to divide and of their normal karyotype - unlike HeLa cells - these cells are a good model for genomic studies.
 
 \section{Data}
 % number of peaks per dataset
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.3]{images/ch_encode_peaks/peaklist_peaknumber_GM12878.png}  
 	\captionof{figure}{\textbf{Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed lines indicate 20'000 and 40'000.}
 \label{encode_peaks_gm12878_peak_number}
 \end{center}
 \end{figure}
 
 % proportion of peaks with motif per dataset
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.3]{images/ch_encode_peaks/peaklist_proportions_GM12878.png}  
 	\captionof{figure}{\textbf{Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep{mathelier_jaspar_2014}, HOCOMOCO v10 \citep{kulakovskiy_hocomoco:_2016} or Jolma \citep{jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed line indicates 0.5.}
 \label{encode_peaks_gm12878_motif_prop}
 \end{center}
 \end{figure}
 
 In these cells, the ENCODE Consortium released ChIP-seq data 53 different TFs. Additionally, nucleosome occupancy (MNase-seq) and chromatin accessiblity (DNasI-seq) data were generated with a depth of coverage. Furthermore, the ENCODE Consortium also released peaks called using their uniform processing pipeline \cite{gerstein_architecture_2012}. These peaks are interesting because i) they are called from technical replicate ChIP-seq samples and ii) several peak callers are used and the different results are integrated. These peaks are thus reproducible [REFERENCE IDR] and robust to peak caller discrepancies and can be considered an excellent standard.
 
 The number of peaks called for each TF was highly variable and likely reflects each factor activity in this cell line (Figure \ref{encode_peaks_gm12878_peak_number}). The most abundant factor in terms of peaks was RUNX3 followed by CTCF. This observation fits to BioGPS \citep{wu_biogps:_2016} data which indicates that both RUNX3 and CTCF have a higher expression in lymphoblast and in B cells compared to other tissues. Regarding CTCF, it is involved in chromatin looping \citep{ghirlando_ctcf:_2016}. Because it implies that two CTCF molecules form an homodimer dued to the genome 3D conformation, it potential multiply by 2 the number of CTCF peaks. Moreover, the propensity of each TF to bind through their motifs was also variable, with again CTCF being showing the highest values \ref{encode_peaks_gm12878_motif_prop}.
 
 \section{ChIPPartitioning : an algorithm to identify chromatin architectures}
 \label{encode_peaks_chippartitioning}
 
 Discovering archetypical chromatin architectures over a set of regions of interest - let's say containing a TF binding site in their middle - is a long standing problem in bioinformatics. More formerly, given a matrix $R$ of dimensions $NxL$ containing $N$ vectors of read counts $r_{1}, r_{2}, ..., r_{N}$ of length $L$, each containing the number of reads mapping at a given position in a given region, find $K \leq N$ vectors of length $L' \leq L$ that contain archetypical signals found in the $N$ regions of $R$. This can actually be solved using clustering methods which groups regions that look alike into $K$ groups. The summary  of the signal inside each group - for instance the mean signal for the K-means algorithm - can then be interpreted as the archetypical chromatin architectures. Biologically, different organization may reflect different functions.
 
 First, the $N$ regions of interest are usually aligned with respect to a feature of interest, for instance a TF binding sites. However, he chromatin features of interest - for instance the nucleosomes - may not be aligned from one region to the next. This can originate because i) of the true binding sites being fuzzely distributed around the center of the regions, ii) the chromatin features appear at a varying distance from the region centers or iii) both. Comparing two regions then necessitate to first realign the chromatin features. Second, the regions can show a functional orientation. For instance, TF binding sites have an upstream and a downstream with respect to the bound sequence. Orienting properly the regions is also required to properly compare the chromatin organizations in two regions. Finally, the signal over some regions may be sparse because of a sub-optimal sequencing depth.
 
 The study of signal distribution over genomic regions has been a quite active field for bulk sequencing experiments during the last decade. Dedicated algorithms \citep{hon_chromasig:_2008,nielsen_catchprofiles:_2012,kundaje_ubiquitous_2012,nair_probabilistic_2014,groux_spar-k:_2019} have been developed to cluster genomic regions based on their distribution of reads.
 
 Most of these  algorithms and softwares deal with some of these issues cited above. However, the algorithm developed by \citep{nair_probabilistic_2014} - which I will call ChIPPartitioning - is probably the best. ChIPPartitioning is a probabilistic partitioning method that softly clusters a sets of genomic regions based on their signal shape (as opposed to the absolute values) resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. 
 
 This algorithm models the signal over a region of length $L$ has having being sampled from a mixture of $K$ signal models, using $L$ independent Poisson distributions. The number of reads sequenced over this region is then the result of this sampling process. The entire set of regions is assumed to have been generated from a mixture of $K$ different signal models (classes). Each class is represented by a vector of $L' \leq L$ values that represent the expected number of reads at each position for that class. These values are thus the Poisson distribution parameters. 
 
 In order to discover the $K$ different chromatin signatures in the data, the algorithm proceed to a maximum likelihood estimation of the Poisson distribution parameters using an expectation-maximization (EM) framework. Given a set of $K$ models, the likelihoods of each region given each class is computed. A posterior probability of each class given each region can, in turn, be computed. These probabilities can be interpreted as a soft clustering. The parameters of the classes are updated using a weighted aggregation of the signal. Since each region is computed a probability to belong to each class, it participates to the update of all the classes, with different weights.
 
 If the length of the chromatin signature searched $L'<L$, then the algorithm slides a window along the regions and searched for this signature at each possible offset. This is how it deals with alignment issue. The signal orientation issue is tackled by also performing a searched with the flipped model. The procedure is depicted in Figure \ref{atac_seq_em}A.
 
-All of the above described algorithms compute a set of posterior probabilities and use them to perform the class model update. As illustrated in Figure \ref{atac_seq_em}, this procedure is actually a weighted and ungaped data alignment in which the posterior probabilities are the weights.
+All of the above described algorithms compute a set of posterior probabilities and use them to perform the class model update. This procedure is actually a weighted and ungaped data alignment in which the posterior probabilities are the weights.
 
 \subsection{Data realignment}
 \label{encode_peaks_data_realign}
 
 ChIPPartitioning computes a set of posterior probabilities and use them to perform the class model updates. As illustrated in Figure \ref{atac_seq_em}, this procedure is actually a weighted and ungaped data alignment in which the posterior probabilities are the weights.
 
 It is absolutely feasible to run a partitioning on a given matrix $A$, for instance MNase-seq read counts, using ChIPPartitioning, and to subsequently use the obtained posterior probabilities to compute the class models, using another data matrix, let us say $B$ of DNase-seq reads.
 
 This procedure allows to realign dataset $B$ as $A$ in order to co-visualize different types of signals. The only things that should be taken care of is that matrices $A$ and $B$ should have the same dimensions.
 
 In the following sections, this is the procedure that will be used to overlay different types of data for a given partition.
 
 
 \section{Nucleosome organization around transcription factor binding sites}
 
 % examples of partitions
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.25]{images/ch_encode_peaks/MNase_profiles.png}  
 	\captionof{figure}{\textbf{Chromatin pattern around TF binding sites in GM12878 :} \textbf{A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display  nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf{B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.}
 \label{encode_peaks_array_measure}
 \end{center}
 \end{figure}
 
 For each dataset, the peak coordinates were reassigned to the closest TF motif, if any. However dealing with unaligned signal was still necessary. Indeed, it could not be excluded that the differents TFs would not be the anchor of the chromatin organization around them and have nucleosome arrays at variable distances from their binding sites. Furthermore, dealing with the region orientation was also needed because i) all peaks did not contain a motif indicating the directionality of the binding site (Figure \ref{encode_peaks_gm12878_motif_prop}) and ii) as before, the TF binding site may not be the main driving force of the neighboring chromatin organization. However, this pre-processing step, even if it could not resolve entirely this issue, could at least soften it.
 
 To uncover the different nucleosome architectures around TF binding site, one partition per peaklist based on the MNase-seq signal and using ChIPPartitioning were performed. Because the time required to run the partitioning procedure is long and is a linear function of the number of classes, the choice of four classes was a compromise allowing to discover several chromatin architectures while not being computationally to intense. ChIPPartitioning was also given a freedom of shifting of 15 bins (corresponding to -70bp, -60bp, ..., 0bp, ..., +60bp, +70bp) and of flipping. A visual inspection of the results revealed that all classes, for all TFs, show a nucleosome array on at least one of the side of the TF binding site (examples are displayed in Figures \ref{suppl_encode_peaks_em_ctcf}, \ref{suppl_encode_peaks_em_nrf1}, \ref{suppl_encode_peaks_em_cfos} and \ref{suppl_encode_peaks_em_max}). Additionally, it was also possible to see an increased chromatin accessibility and sequence conservation at the level of the binding site. The enhanced chromatin accessibility is compatible with the current view of TFs binding nucleosome depleted regions [REFERENCE]. However, the absence of a footprint like signal is explained by the shifting. By shifting and flipping the regions, ChIPPartitioning realigns the signal over these regions, at the cost of unphasing the binding sites.
 
 A noticeable exception to this rule was the early Early B-cell factor 1 (EBF1) that seemed to had nucleosome arrays spanning its binding sites (Figure \ref{encode_peaks_array_measure}B).
 
 In order to explore more carefully to what extent nucleosome arrays may be organized with respect to each TF binding sites, I used the mean array density measure developed by \citep{zhang_canonical_2014}. A class pattern showing well positioned nucleosomes is typically showing sharp regions of strong signal separated by signal depleted regions reflecting of the alternance of nucleosome presence/absence. The method developed by Zhang and colleagues basically searches for strong variations of signal. The highest the score, the most the pattern contains to well positioned nucleosomes. On the other hand, the ability of a TF to act as an anchor for arrays organization was measured as the standard deviation of the shift used by ChIPPartitioning. Briefly, it is possible to compute the probability density of the usage of each shift state. Assessing how much the different shift states were used is indicative of how much the individual patterns were aligned at the beginning. A low standard deviation value indicates that the shifting tends to be the same for all binding sites and thus that the nucleosome arrays occur at a fixed - unspecified - distance from the binding site. In this case, the binding site could be the array anchor. 
 
 Both values were measured for all classes discovered, for all TFs. The results are displayed in Figure \ref{encode_peaks_array_measure}. First, it was possible to identify a sub-population of classes in which the TF binding site seemed to act as an anchor for the nucleosomes. This represented binding sites for CTCF, RAD21, SMC3, YY1 and ZNF143 (see Figure \ref{encode_peaks_array_measure}A, points 6,8,10,13,14,15,18 and 19). A closer inspection of these class patterns showed a strong DNaseI footprint and a peak of sequence conservation. A DNaseI footprint is a typical pattern - composed of a signal depletion in between two signal enriched regions - revealing a region protected against the action of DNaseI  by the binding of a factor. The presence of a clear footprint indicates that the underlying binding sites were aligned, supporting the fact that the binding sites are anchors for the nucleosome organization. This was further supported by the sharp peak of sequence conservation indicating, most likely reflecting the TF motif. Nonetheless, all other classes showed a wide and fuzzy chromatin accessibility pattern, as illustrated by ATF3 in Figure\ref{encode_peaks_array_measure}B, indicating miss-aligned binding sites.
 
 Breast cancer type 1 susceptibility protein (BRCA1) was also identified using this method. The identified class (class 3, see Figure \ref{suppl_encode_peaks_em_brca1}) indeed showed well positioned nucleosomes. However, I decided not to consider this hit for two reasons : i) there was not footprint in the nucleosome depleted region indicating that the sites are not aligned and ii) the ENCODE consortium labeled this peak list as problematic (low reproducibility read coverage).
 
 Finally, it should be noted that noisy MNase-seq patterns were attributed  high nucleosome array density scores. Because the nucleosome signal is noisy, it varies a lot and got a good score. Such classes are found in the cloud of points just above the horizontal line on the right of the plot (mostly RNAPIII peak classes). Second, some CTCF binding sites displayed strongly positioned nucleosome, confirming previous reports \citep{kundaje_ubiquitous_2012, fu_insulator_2008}.
 
 Thus even if all classes showed at least one nucleosome array, it seems that most of the TFs are not the force driving the array organization, with the notable exceptions of CTCF, RAD21, SMC3, YY1 and ZNF143.
 
 \section{The case of CTCF, RAD21, SMC3, YY1 and ZNF143}
 \label{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.25]{images/ch_encode_peaks/colocalization_ctcf.png}
 	\captionof{figure} {\textbf{ Colocalization with CTCF peaks in GM12878 cells : } \textbf{A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf{B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf{C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.}
 \label{encode_peaks_colocalization_ctcf}
 \end{center}
 \end{figure}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.4]{images/ch_encode_peaks/CTCF_ndr_length_rad212.png}
 	\captionof{figure} {\textbf{Nucleosome free region at CTCF binding sites} \textbf{a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf{B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.}
 \label{encode_peaks_ctcf_ndr}
 \end{center}
 \end{figure}
 
 
 Two possible alternative hypotheses can explain the presence of these strong nucleosome arrays around these TFs binding sites. First, each TF has the ability to drive the formation of well spaced nucleosome arrays in their vicinity. Second, all the classes detected contains the same set of genomic regions.
 
 Two obsevations strongly support the second hypothesis. First CTCF is known to interact with the cohesin complex \citep{stedman_cohesins_2008} - composed of SMC1, SMC3, RAD21 and either STAG1 or STAG2 \citep{losada_cohesin_2014} -, with YY1  \citep{donohoe_identification_2007} and with ZNF143 \citep{bailey_znf143_2015}. Second, the YY1 and ZNF143 showed \url{~50}\% and \url{~10}\% of direct binding respectively (Figure \ref{encode_peaks_gm12878_motif_prop}), leaving the possibility of an indirect binding mechanism, for instance through CTCF.
 
 To further confirm this hypothesis, I measured the extent to which CTCF and these other TF peaks co-localized. To do so, each RAD21, SMC1, YY1 and ZNF143 peak was checked for the presence of a CTCF peak. The results, shown in Figure\ref{encode_peaks_colocalization_ctcf}A, support the four already known interaction between CTCF and the cohesin complex members RAD21 and SMC3, between CTCF and YY1 and to a lesser extent and between CTCF and ZNF143. Additionally, for YY1 and ZNF143, the presence of CTCF and of a canonical motif happen at separated peak subsets, as shown in Figure \ref{encode_peaks_colocalization_ctcf}B, suggesting two different binding strategies : i) through a direct recognition of the motif or ii) through another mechanism leading to a co-localization with CTCF - most likely through binding to CTCF. 
 
 Peaks are represented by the maximum read density position, as defined by ENCODE. Thus, the effective binding site of these TF can by anywhere in the peak. As a matter of fact, ZNF143 and YY1 may bind close but without direct interaction with CTCF. If SMC3, RAD21, YY1 or ZNF143 physically interact with CTCF and bind as a complex, one prediction would be that an extended nucleosome depleted region (NDR) should be observed to allow these complexes to bind.
 
 In order to verify this hypothesis, I set up a classification method that assigns either a "nucleosome" or a "free" label to each position, in a given regionbased the MNase-seq signal. Assuming that the center of the CTCF peaks is in a NDR, these positions were labeled as 'free' and from there, the neighboring positions on the left and on the right were classified until finding the first position labeled 'nucleosome' (see Figure \ref{suppl_encode_peaks_ctcf_ndr}). The size spanned by the regions labeled as 'free' were then measured for each CTCF binding site. The NDR lengths were finally grouped according to the presence of RAD21, SCM3, YY1 or ZNF143 (Figure \ref{encode_peaks_ctcf_ndr}).
 
 First, it seems that CTCF binding sites are distributed in two functional groups of regions based on the presence of other interactors : i) promoter distant regions with both RAD21 and SMC3 (the cohesin complex), ii) promoters together with YY1 and/or ZNF143. This segregation likely reflects different functions of CTCF : i) looping related functions with the cohesin complex and ii) a regulator of transcription with other partners. The fact that promoter enriched groups show an increased NDR, can be explained by an enhanced chromatin opening to accommodate for the presence of other TFs and of the RNAPII.
 
 Interestingly the subgroups containing the cohesin complex (in orange in Figure \ref{encode_peaks_ctcf_ndr}A) show a NDR length that is function of the number of TFs present (cohesin < cohesin + YY1/ZNF143 < cohesin + YY1 + ZNF143). Because such these sites are away from promoters, it is really likely that the increased NDR size is only caused by the binding of a larger CTCF complex. Furthermore, their reduced NDR size measured is compatible with the classes of binding sites showing strong nucleosome arrays.
 
 Finally, in order to reveal the nucleosome organization around each subset of peaks, I performed a ChIPPartitioning classification method using two classes, with one of them set to represent a flat signal (and to act as a "waste" class). The aim was to make a clear difference between "typical" and "non-typical" nucleosome organizations. For RAD12, SMC3, YY1 and ZNF143 the results showed that strong nucleosome arrays on both sides and a clear DNaseI footprint are only present when a CTCF is also present, as illustrated for YY1 in Figure \ref{encode_peaks_colocalization_ctcf}C.
 
 Together, these results support the hypothesis that CTCF forms a complex with YY1 and/or ZNF143, additionally than with the cohesin complex. They also support the  fact that only CTCF has the property of positioning nucleosome into regular arrays in its vicinity and that any other TF showing such a behaviour is likely binding with or near CTCF. As important, the apparent seggregation in terms of regions bounds by the different CTCF complexes is consistent with the hypothesis that the different functions of CTCF depends on its interactors \citep{ong_ctcf:_2014, ghirlando_ctcf:_2016}.
 
 \section{CTCF and JunD interactomes}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.4]{images/ch_encode_peaks/TF_associations.png}  
 	\captionof{figure}{\textbf{Possible interaction scenarios between TFs} \textbf{A} Indirect co-binding. The TFs dimerize and bind together on DNA. \textbf{B} Indirect co-binding. Both TF dimerize but only one binds the DNA, the other (the blue) is the tethering factor. \textbf{C} Independent co-binding. Both TF bind in close vicinity but without forming a complex. Both TFs may not be necessarily bound at the same time. \textbf{D} Interference. Both motifs partially or totally overlap each other. Whether only one TF or both can bind at the same time is unknown.}
 \label{encode_peaks_tf_association}
 \end{center}
 \end{figure}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.4]{images/ch_encode_peaks/ctcf_motif_association.png}  
 	\captionof{figure}{\textbf{CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf{A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf{B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf{C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref{encode_peaks_methods_data}).}
 \label{encode_peaks_ctcf_association}
 \end{center}
 \end{figure}
 
 \begin{table}
 \begin{center}
 	\begin{tabular}{ |c|c|c|l|l|c|c| }
   	\hline
   	\multicolumn{7}{|c|}{Curated associations} \\
   	\hline
   	TF$_{A}$ & TF$_{B}$ & Motif ass. & Type  & Binder & Reported & Validated \\
   	\hline
   	CTCF & ATF2   & pos & indep.co-bind &       & no  & no \\
   	CTCF & EBF1   & pos & indep.co-bind &       & yes & no \\
   	CTCF & MAZ    & pos & indep.co-bind &       & yes & no \\
   	CTCF & NFYb   & pos & indep.co-bind &       & yes & no \\
   	CTCF & NFkB   & pos & indep.co-bind &       & yes & no \\
   	CTCF & PAX5   & pos & indep.co-bind &       & yes & no \\
   	CTCF & SP1    & pos & indep.co-bind &       & yes & no \\
   	CTCF & BATF   & neg & indir.co-bind & BATF  & yes & no \\
   	CTCF & ELF1   & neg & indir.co-bind & ELF1  & yes & no \\
   	CTCF & IRF4   & neg & indir.co-bind & CTCF  & yes & no \\
   	CTCF & MEF2a  & neg & indir.co-bind & both  & yes & no \\
   	CTCF & MEF2c  & neg & indir.co-bind & both  & yes & no \\
   	CTCF & NFATc  & neg & indir.co-bind & CTCF  & no  & no \\
   	CTCF & NFYa   & neg & indir.co-bind & CTCF  & yes & no \\
   	CTCF & NRF1   & neg & indir.co-bind & CTCF  & yes & no \\
   	CTCF & NRSF   & neg & indir.co-bind & CTCF  & yes & no \\
   	CTCF & PAX5   & neg & indir.co-bind & both  & yes & no \\
   	CTCF & POU2f  & neg & indir.co-bind & POU2f & yes & no \\
   	CTCF & RUNX3  & neg & indir.co-bind & both  & no  & no \\
   	CTCF & SRF    & neg & indir.co-bind & CTCF  & yes & no \\  
   	CTCF & USF1   & neg & indir.co-bind & both  & yes & no \\  
   	CTCF & YY1    & neg & indir.co-bind & CTCF  & yes & yes\\  
   	CTCF & ZNF143 & neg & indir.co-bind & CTCF  & yes & no \\
   	\hline
   	JunD & BHLHE40 & neg & indir.co-bind & BHLHE40 & yes & no \\
   	JunD & CTCF    & neg & indir.co-bind & CTCF    & yes & no \\
   	JunD & EBF1    & neg & indir.co-bind & EBF1    & yes & no \\
   	JunD & EGR1    & neg & indir.co-bind & EGR1    & yes & yes\\
   	JunD & ELK1    & neg & unknown       &         & no  & no \\
   	JunD & IRF4    & neg & indir.co-bind & JunD    & yes & yes\\
   	JunD & MAZ     & neg & indir.co-bind & MAZ     & no  & no \\
   	JunD & PAX5    & neg & indir.co-bind & PAX5    & yes & no \\
   	JunD & SP1     & neg & indir.co-bind & SP1     & yes & yes\\
   	JunD & USF2    & neg & indir.co-bind & USF2    & yes & no \\
   	JunD & YY1     & neg & indir.co-bind &         & yes & yes\\
   	JunD & ZBTB33  & neg & unknown       &         & yes & no \\    
   	\hline
 	\end{tabular}
         
 	\captionof{table} { \textbf{Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep{chatr-aryamontri_biogrid_2017}.}
 \label{encode_peaks_association_table}
 \end{center}
 \end{table}
 
 The study of co-binding with CTCF showed that it was possible to detect global associations. I already detected that the cohesin complex members SMC3 and RAD21 form a complex with CTCF, as expected from literature [REFERENCE NEEDED]. Additionally, I detected that YY1 and ZNF143  are also frequently associated with CTCF.
 
 Thus, I decided to push forward in this direction. To this end, I set up a method based on motif co-occurrence to i) relieve the necessity of observing similar chromatin architectures, as in the previous section and ii) be able to functionally characterize the detected interactions.  
 
 Several types of functional associations can occur between a TF$_{A}$ and a TF$_{B}$. Because each one of them brings different expected patterns in the data, it should be possible to detect and disentangle them. First two TFs can dimerize and bind to DNA using both DNA binding domains (DBDs) [REFERENCE NEEDED] (Figure \ref{encode_peaks_tf_association}A). I will refer to this case as \textbf{direct co-binding}. If this happens, both TF motifs are expected to appear in close vicinity, more often than by chance. Moreover, a spatial constrain (both spacing and orientation) reflecting the complex structure is also expected to occur. Second, two TFs can dimerize and bind to DNA using only one of the DBDs. This will result in having one of the TF bound to DNA while the other one will tether DNA through its interaction with the other TF (Figure \ref{encode_peaks_tf_association}B). This case will be referred to as \textbf{indirect co-binding}. In such a case, if TF$_{A}$ is the factor binding its motif and TF$_{B}$ is the tethering factor, both motifs are expected to repel (avoid) each other at TF$_{A}$ binding sites. Third, two TFs can both bind DNA using their own DBDs, in close vicinity but without any physical interaction (Figure \ref{encode_peaks_tf_association}C). In such as case, both motif$_{A}$ and motif$_{B}$ are expected to be enriched at both TF$_{A}$ and TF$_{B}$ binding sites. However, no spatial constrain is expected between the motifs. This case will be refered to as \textbf{independent co-binding}. This can be caused by a temporal relationship between both TFs where both TFs can bind to a given region asynchronously. For instance, a first TF is recruited to its binding site and ensures - somehow - a proper chromatin environment for another TF, such as illustrated during macrophage and B cells progenitors commitment \citep{heinz_simple_2010}. Finally, in case of a partial or total motif overlap, both TFs may be observed to be bound together (Figure \ref{encode_peaks_tf_association}D). In such a case, different phenomenons may explain this observation. A first possible explanation would be that two TFs compete to bind to the same region. Observing both TFs bound together could be due to an overlap of data from different cells in which only one TF is bound at the time. A second possible explanation would be that, for some reason, only one TF is bound, never the other. However, I prefer to be cautious regarding the causal mechanisms and this case will be referred to as an \textbf{interference}.
 
 In order to collect more evidences about functional connections between TFs, I developed a simple analysis pipeline able to detect the expected patterns of motifs described above. Briefly, given a set of binding sites for a TF$_{A}$, it is possible to construct a contingency matrix containing the number of binding site with i) motif$_{A}$ and motif$_{B}$, ii) motif$_{A}$ only, iii) motif$_{B}$ only or iv) no motif and assess whether both motifs are associated or avoid each other using an exact Fisher test. Then, for pairs of motifs showing an association, displaying the spatial distribution of the motif may help to discriminate whether or not there is a spacing constrain or a motif overlap.
 
 I investigated the association of 47 TFs for which 53 datasetes were available in GM12878 cells with CTCF or JunD. CTCF was chosen because i) most of its binding sites have a short nucleosome depleted region and show only a peak of sequence conservation at the binding site leaving a restricted space for other motifs to co-occur (Figure \ref{suppl_encode_peaks_em_ctcf}) and ii) I already collected several observation regarding CTCF. JunD was chosen as a complementary example to CTCF in the sense that i) contrarily to CTCF, it is only a trancriptional regulator, ii) it is expected to bind to regulatory regions mostly thus to open chromatin regions where other motifs are expected to co-occur , iii) \url{~50}\% of the peaks have a motif versus \url{~80}\% to \url{~90}\% for CTCF peaklists (Figure \ref{encode_peaks_gm12878_motif_prop}).
 
 % motif co occurence
 Motif co-occurrence analysis suggested several interactions. Regarding CTCF motif (Figure \ref{encode_peaks_ctcf_association}A), 8 positive motif association (ATF2, EBF1, MAZ, NFYb, NFkB, PAX5, SP1, YY1) and 16 negative motif associations (BATF, ELF1, IRF4, MEF2a, MEF2c, NFATc, NFYa, NRF1, NRSF/REST, PAX5, POU2F2/OCT2, RUNX3, SRF, USF1, YY1 and ZNF143) with other motifs were found. Regarding JunD (Figure \ref{suppl_encode_peaks_jund_association}A), positive motif association with 2 others TF motifs (BATF, cFos) and 12 negative associations with others TF motifs (ATF2, BHLHE40, CTCF, EBF1, EGR1, ELK1, IRF4, MAZ, PAX5, SP1, USF2, YY1 and ZBTB33) were found. cFos and one of the YY1-Sydh peaklists displayed evidences of poor quality (not shown and annotated as such by the ENCODE Consortium). Additionally, ATF2 is an AP1 member which possess a 2bp spacer (TGANNTCA) while JunD is a 1bp motif  space (TGANTCA). Thus the strong negative interaction may simply be due to the fact that both motifs are simply mutually exclusive. In consequence, the positive associations CTCF-YY1 and JunD-cFos and the negative association JunD-ATF2 should be ignored. Additionally, JunD and BATF motifs are the same as both these TFs belong to the AP1 family. In consequence, it is impossible to say whether BATF peaks harbour a JunD or a BATF site. Thus this association should be ignored as well, leaving no positive association left with JunD motif.
 
 % densities
 The analysis of CTCF and JunD motif occurrence densities (Figures \ref{encode_peaks_ctcf_association}B and C and Figure \ref{suppl_encode_peaks_jund_association}B and C) revealed further interesting details regarding possible association mechanisms. First, positive associations showed CTCF density patterns mostly compatible with the direct co-binding and the independent co-binding scenarios (see Figure \ref{encode_peaks_ctcf_association}B). However, making a clear distinction between both is often impossible. For instance, both EBF1 peaklists show a decrease in CTCF motif density \url{~10}bp after the peak followed by an increase which could represent the spacer between CTCF and EBF1. However this is followed by a rather wide CTCF motif presence, mostly suggesting an independent co-binding scenario. An interesting candidate for a direct co-binding with CTCF is RXRa (Figure \ref{encode_peaks_ctcf_association}B). Even though the motif association was not significant, a focused co-localization of both motif appears. Second, negative associations showed CTCF and JunD density patterns compatible with the indirect co-binding scenario where the TFs would tether through CTFC or JunD, i.e. the CTCF or JunD motifs do not show a spacing constrain with the binding sites but are rather spread over ~100bp around binding sites without their own motif (Figure \ref{encode_peaks_ctcf_association}C and Figure \ref{suppl_encode_peaks_jund_association}C). Interestingly, CTCF motif around YY1 and ZNF143 binding sites lacking their own motifs (see bottom of Figure \ref{encode_peaks_ctcf_association}C) showed really focused densities, indicating that for some reason, the CTCF motif is well localized. Even if unexpected, this observation is not incompatible with the indirect co-binding scenario and further supports the results from section \ref{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}.
 
 % results
 To summarize, the motif association statistics allowed me to identify 35 associations of TFs with either CTCF or JunD (Table \ref{encode_peaks_association_table}). The strongest negative interactions for CTCF were ZNF143 and YY1, supporting the results found in the previous sections. The analysis of CTCF and JunD motif spatial distributions around peaks and a closer examination of the contingency matrices allowed to suggest details about the interacting mechanisms, including which TF binds DNA. The only two exceptions were JunD-ELK1 and JunD-ZBTB33 for which the motif occurrence densities were uninformative. Finally, out of these 35 associations, 5 were supported by experimental evidences and 5 were not already reported in previous studies or databases \citep{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012, chatr-aryamontri_biogrid_2017}.
 
 \section{EBF1 binds nucleosomes}
 
 \begin{figure}
 \begin{center}
 	\includegraphics[scale=0.4]{images/ch_encode_peaks/ebf1_haib_1.png}  
 	\captionof{figure}{\textbf{EBF1 binding sites} stand on the edge of a nucleosome. \textbf{A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep{gaffney_controls_2012}. \textbf{B} Dinucleotide frequencies around the dyads of the nucleosomes that have an EBF1 binding site within 100bp. \textbf{C} Motif frequency around the dyads of nucleosomes that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.}
 \label{encode_peaks_ebf1}
 \end{center}
 \end{figure}
 
 % As presented above (section \ref{encode_peaks_chippartitioning}), EBF1 binding sites does not seem to present a NDR seem to be covered by a nucleosome array. This observation suggest that EBF1 can bind to nucleosomal DNA. However, because ChIPPartitioning realigns the data, one possible explanation is that it failed to properly aligned the data and that the results do not reflect reality.
 
 % In order to clarify this, I looked at the MNase digestion profile - more specifically, at the distribution of nucleosome dyads - at EBF1 binding sites.
 
 EBF1 is a crucial factor for B cell development. It is necessary in the early steps, for a proper lineage commitment as well as later on during the entire B cell development \citep{boller_defining_2018}. Since many years, EBF1 has been though to be able to "pioneer early changes in the target gene chromatin necessary for transcriptional activation" and proper B cell development \citep{hagman_early_2005}. Experimental evidences supported that EBF1 could be able to bind compacted naive chromatin (without noticeable mark/modification), leading to a local chromatin opening, H3K4me2 deposition, DNA demethylation and gene activation \citep{maier_early_2004,boller_pioneering_2016}. If such features makes a lot of sense during lineage commitment, the some underlying mechanisms remained mysterious, especially how EBF1 primarily binds to closed chromatin. With regard to this, the results of section \ref{encode_peaks_chippartitioning}, suggesting that EBF1 binding sites may be covered by nucleosome arrays, rose my attention. In order to collect evidences that may shed light on this, I conducted a deeper exploration of the EBF1 binding sites.
 
 First, the distribution of nucleosome dyads - from two independent experiments - around EBF1 binding sites revealed a landscape that is compatible with a nucleosome positioned ~70bp apart from the binding sites (Figures \ref{encode_peaks_ebf1}A). This configuration would position the EBF1 binding site at the edge of the nucleosome. The 10bp periodicity visible suggested that other positioning of the EBF1 binding site exist but always at integer numbers of helix turn, such that the EBF1 binding site would always be positioned the same compared to the nucleosome surface. Surprisingly, the distribution of EBF1 motif remained the same, whether the nucleosome was containing an EBF1 bound site or not (Figure \ref{suppl_encode_peaks_ebf1_nucl}).
 
 Second, to support the fact that these EBF1 binding sites are indeed functional sites, I compared some of their chromatin features with the entire nucleosome pool. As expected, the presence of EBF1 binding sites was correlated with an increased accessibility (Figure \ref{suppl_encode_peaks_ebf1_chrom}A), even though the opening was spread rather than narrow. Furthermore, this increased opening was concomitant with an enriched H3K4me2 deposition (Figure \ref{suppl_encode_peaks_ebf1_chrom}B), in line with the literature. Last, it was also possible to highlight a higher sequence conservation at the nucleosome edges when they had an EBF1 binding site (Figure \ref{suppl_encode_peaks_ebf1_chrom}C), suggesting a functional difference between both nucleosome pools.
 
 % Finally, Trifonov's motif appeared along the nucleosome, EBF1 motif was  rather present at the nucleosome edges. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif ({A/C}CCC{A/C} and {A/G}GGG{A/G}) at the cost of 2 or 0 missmatches.
 
 % A further inspection of the dinucleotide base composition in the nucleosome bearing an EBF1 binding site revealed a periodic pattern that is compatible with a rotationally positioned nucleosome (Figure \ref{encode_peaks_ebf1}B), as expected from literature in \citep{ioshikhes_variety_2011,gaffney_controls_2012}.
 
 % Finally, the occurrence of the nucleosome positioning motif - YRRRRRYYYYYR where Y is C/T and R is A/G - identified by Trifonov \citep{trifonov_cracking_2011} in these nucleosomes is antiphased with the occurrence of the EBF1 motif. If Trifonov's motif appeared along the nucleosome, EBF1 motif was  rather present at the nucleosome edges. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif ({A/C}CCC{A/C} and {A/G}GGG{A/G}) at the cost of 2 or 0 missmatches.
 
 % These results suggest that EBF1 can bind nucleosomal DNA. In most cases, it seems that the EBF1 binding site is located at its edge. Incidentally, the high similarity between Trifonov and EBF1 motifs suggest that EBF1 binding sequence may have a nucleosome positioning property. Interestingly, EBF1 motif, as identified by JASPAR \ref{suppl_encode_peaks_ebf1_logo}, is 14bp wide. Consequently, it is conceivable that, wherever this motif is located along the nucleosome, at least part of remains facing outward and is thus "readable".
 
 % Based on this observation, I hypothesize that EBF1 may be a pioneering factor or that it influence nucleosomes positioning through its binding. In the first case, EBF1 would be able to target yet inaccessible loci upon the right cellular conditions. In the second case, EBF1 would rather serve to both open and close targeted sites by leading - directly or indirectly - to the positing of a nucleosome right beside of it binding site. Both scenarios make sense. Indeed, EBF1 is known to be crucial for B-cells commitment. In such developmental processes, specific enhancers are made accessible and active at different, in a coordinated manner, during the developmental process. (AND WHAT ABOUT CLOSING???)
 
 Third, a further inspection of the sequence composition of the nucleosomes bearing an EBF1 binding site revealed i) a periodic occurrence of antiphased WW (W=A/T) and SS (S=C/G) dinucleotides and ii) a periodic occurrence of the YRRRRRYYYYYR (R=A/G, Y=C/T) nucleosome positioning motif described by Trifonov \citep{trifonov_cracking_2011}. Together, these observations suggest that EBF1 binding sites are located on the edge of a rotationally positioned nucleosome \citep{ioshikhes_variety_2011,trifonov_cracking_2011,gaffney_controls_2012}. Interestingly, Trifonov's motif appeared in counter phased with EBF1 motif. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif (\{A/C\}CCC\{A/C\} or \{A/G\}GGG\{A/G\}) at the cost of 2 or 0 missmatches.
 
 These results suggest that EBF1 can indeed bind nucleosomal DNA. The motif bound were predominantly located at the edges of the nucleosomes. Yet, this was also the fact for nucleosome that do are not bind by EBF1. This suggests that nucleosomes are already in this position before EBF1 binding, which may be the case given the presence of favorable nucleosome positioning sequences.
 
 The reason why the EBF1 motif is already on the edges of nucleosome, even without EBF1 binding, remains unknown. One explanation could be that such sites have a double function. The first function would be to recruit EBF1 to open up the region. The second, would be that EBF1 binding sequence (together with other positioning sequences) can act as a barrier - a potential well - avoiding the nucleosome to roll over in this direction. Such a system would have the advantage of promoting a suited chromatin structure in developmentally important regions. Constraining nucleosome movement would could serve to hide regulatory elements. At the same time, these regions would remain responsive to differentiation signals through the exposition of EBF1 sites on the periphery of nucleosomes.
  
 
 \section{Methods}
 
 \subsection{Data and data processing}
 \label{encode_peaks_methods_data}
 
 All the GM12878 ENCODE data used were mapped against hg19 genome and can be found on the MGA repository \citep{dreos_mga_2018}.
 
 Peaks called by the ENCODE Consortium using their uniform processing pipeline \cite{gerstein_architecture_2012} were used. These peaks can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/Uniform-TFBS/Uniform-TFBS.html}. Assuming that a TF binds to DNA through motif recognition, the peak center should be localized on the motif center. Thus the center of each peak was moved to the closest motif instance within 60bp. To do so, each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \cite{mathelier_jaspar_2014}, HOCOMOCO v10 \cite{kulakovskiy_hocomoco:_2016} or Jolma \cite{jolma_dna-binding_2013} collection. Using the corresponding log-odd PWM, peak sequences were scanned to find motif instance with a score corresponding to a pvalue higher or equal to 1e-4. If such a motif instance was found, the peak position was shifted to the center of the motif instance and mapped to the corresponding strand. Otherwise, the peak position remained unchanged without strand information.
 
 In GM12878 cells, nucleosome occupancy was assessed using MNase-seq data released by the ENCODE Consortium (GSE35586). These data can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/GSE35586/GSE35586.html}. To increase sequencing depth, all replicates available for this cell line were pooled together, resulting in ~789 mio reads, and used as a single dataset. The resulting dataset is available and has the description "GM12878|Nucleosome|all (SLOW!)". Because each read was represented as a single point coordinate corresponding to their 5' edges, these coordinates were centered by 70bp in order to indicate the nucleosome dyads. Finally, another dataset was used for one analysis only. These data were released by Gaffney and colleagues \cite{gaffney_controls_2012} and can be found at \url{https://ccg.epfl.ch/mga/hg19/gaffney12/gaffney12.html} and were not centered as the coordinates already represent the center of paired-end sequenced fragments. The dataset is labeled "All Paired-end samples - 147bp fragments".
 
 Chromatin accessibility was assessed using DNaseI-seq data released by the ENCODE Consortium \cite{boyle_high-resolution_2008} (GSE32970). To increase sequencing depth, all replicates available for GM12878 cells were pooled together, resulting in ~144 mio reads, and used as a single dataset. The  individual replicates can found at \url{https://ccg.epfl.ch/mga/hg19/encode/Duke-DNaseI-HS/Duke-DNaseI-HS.html}. The reads were represented as a single point coordinate corresponding the their 5' edges but were not centered as this correspond to the exact DNaseI nick location.
 
 The EPDnew release 003 was used as TSS annotation \cite{dreos_eukaryotic_2017} and genome sequence conservation was assessed using Phastcons \cite{siepel_evolutionarily_2005}. Both datasets can be found at \url{https://ccg.epfl.ch/mga/hg19/epd/epd.html} and \ref{https://ccg.epfl.ch/mga/hg19/phastcons/phastcons.html} respectively.
 
 \subsection{Classification of MNase patterns}
 \label{encode_peaks_em_mnase}
 
 For each TF peaklist MNase, DNase, sequence conservation and TSS density around TF binding site were assessed independently by counting the number of read mapped from -999bp to +1000bp around each peak, using 10bp bins. For each TF, 4 matrices having one row per binding site (peak) and 199 columns were created using ChIP-extract program \citep{ambrosini_chip-seq_2016}.
 
 Probabilistic pattern classification was achieved using the ChIPPartitioning (see section \ref{encode_peaks_chippartitioning}). The algorithm was implemented as described in the supplemental materials of \cite{nair_probabilistic_2014}. 
 
 Two different procedures were used to classified MNase patterns. Both were run for 10 iterations allowing flip and a value of shift of 15 bins.
 
 The first procedure aimed to discover 4 different pattern classes, allowing flip and a shift of 15 bins. The procedure was initialized with 4 classes. The class patterns were initialized by assigning each peak a random probability to belong to each of the 4 classes. The patterns were then computed as the weighted average of the signal given the peak class probabilities as weights. Then the prior class probabilities were initialized as $p_{k,s,f} = 1/K*S*2$ where $k$ is the class index, $s$ is the shift value in bins (here 15), $f$ is an indicative variable for the flip state (1 for "normal", 2 for "reverse"), $K$ is the number of classes (here 4) and $S$ is the maximum allowed shift in bins. The classification was run for 10 iterations. At the end, it returned a matrix of dimensions $NxKxSx2$ containing the probabilities for each of the $N$ region to belong to each of the $K$ class, for each possible shift state $S$ and for both flip states ("normal" or "reversed"). 
 
   The second procedure aimed to discriminate between 2 classes : i) the binding sites describing the "average" binding sites as opposed to ii) those differing from this. To do so, class patterns were initialized to i) the aggregation over all peaks (the average pattern) and ii) a flat pattern being the mean number of counts of the input matrix. Flip and 15 bins of shift were allowed. The prior class probabilities were initialized as $p_{k,s,f} = \mathcal{N}(s,floor(S/2)+1,1)$ where the second and third parameters are the mean and the standard deviation, giving a higher prior probability to states with shift equal to 0bp.
 
 \subsection{Quantifying nucleosome array intensity from classification results}
 Nucleosome array intensity was quantified using a method developed by Zhang and colleagues \citep{zhang_canonical_2014}. Briefly, nucleosome signal is represented in 2 dimensions as a set of signal intensities for a given set of positions. Data are structured as vector $Y$ containing the nucleosome occupancy signal (for instance an EM classification class profile) for $n$ bins (for EM class profiles, 199 bins of 10bp). First, the 1$^{st}$ order derivative $D_{1}$ of $Y$ is computed. Then the 1$^{st}$ order derivative $D_{2}$ of the absolute value of $D_{1}$ is computed. Local maxima in $D_{2}$ are searched using a windows of 15 bins (corresponding to 150bp, a nucleosome width). Maxima can be interpreted as strong drop or enrichment of signal, corresponding to a pattern expected from a well positioned nucleosome array. Finally, all $D_{2}$ maxima are joint by a line and the nucleosome array intensity at each given position is the height of the line at this position. The nucleosome array density for the first and last position of $Y$ were set to 0. The average nucleosome array intensity of $Y$ was used as the nucleosome array value of the input data.
 
 The classification of a matrix of counts having $N$ rows (regions), with $K$ classes, allowing a maximum of $S$ shift states and two flip states ("normal" and "reverse") outputs a probability matrix $P$ of dimension [$N$, $K$, $S$, 2] containing the probability for each region to belong to each class, given a shift state and a flip state. This matrix can be used to compute a vector $D_{k}$ of length $S$ containing the probability density of the shift states for a class $k$ using :
 
 \begin{equation}
 \begin{aligned}
 	D_{k,s} & = \frac {\sum_{i=1}^{N} (P_{i,k,s,1} + P_{i,k,s',2})} {\sum_{i=1}^{N} \sum_{s=1}^{S} (P_{i,k,s,1} + P_{i,k,s',2})} \\
 	\text{with } \\
 	 s' & = S - s + 1
 \end{aligned}
 \label{encode_peaks_equation_shift_density1}
 \end{equation}
 
 
 \citep{ambrosini_chip-seq_2016}
 where $s'$ represents the index of the reverse orientation and with the constrain that all the elements of $P$ sum to 1. Given the shift probability density vector $D_{k}$ of one class, computing its standard deviation was done using :
 
 \begin{equation}
 \begin{aligned}
 	\sigma_{k} & = \sqrt { \sum_{i=1}^{S} (X_{i}^{2} \cdot D_{k,i}) - \mu_{k}^{2} }\\
 	\text{with } \\
 	\mu_{k} & = \sum_{i=1}^{S} (X_{i} \cdot D_{k,i})
 \end{aligned}
 \label{encode_peaks_equation_shift_density2}
 \end{equation}
 
 where $X$ is a vector containing the position changes in bp for every shift state, i.g. for a maximum number of shift states of 15 ($S=15$) with bins of 10bp, X would contain [-70, -60, ..., 0,  ..., +60, +70].
 
 \subsection{Peak colocalization}
 
 To measure the extent of colocalization between CTCF, YY1, ZNF143, SMC3 and RAD21, the occurrence of YY1, ZNF143, SMC3 and RAD21 peaks around CTCF peaks was computed using ChIP-extract \citep{ambrosini_chip-seq_2016}. The CTCF peak list used as reference was "wgEncodeAwgTfbsSydhGm12878Ctcfsc15914sc20UniPk" because it was the CTCF peak list containing i) the most CTCF peaks and ii) the highest proportion of peaks with a motif. Chip-extract was run separately for YY1, ZNF143, SMC3 and RAD21 using the following parameters : from -99, to 100, window size 1. Then, the propotion of CTCF peak having at least one other peak within +/-10 bp, 50bp or 100bp was computed.
 
 \subsection{NDR detection}
 
 Let us consider a matrix of MNase-seq counts $R$ of dimensions $NxL$ containing N vectors of read counts $r_{1}, r_{2}, ..., r_{n}$ of length $L$. Because MNase-seq reads are a direct indication of the nucleosome occupancy, detecting NDRs is about finding low signal regions, flanked by two high signal regions.
 
 The signal in each vector $X_i$ (region) is assumed to have been sampled from a 2 class mixture of high (nucleosome) and low (nucleosome-free) signal, using a Poisson distribution. Both classes are expected to occur with a given probability  $p^{nucl}_{i}$ and $p^{free}_{i}$. The rows are considered individually to lessen technical biases such as region specific sequencing depth.
 
 The class probabilities and their mean parameters are estimated using an EM algorithm. First, during the E-step, for each position inside a region, the posterior probability of the nucleosome given the data is computed using :
 
 \begin{equation}
 \begin{aligned}
 	P(nucl | r_{i,l}) = \frac{p_{i}^{nucl} \times Poisson(r_{i,l}, \lambda=m_{i}^{nucl})}
 	                         {p_{i}^{nucl} \times Poisson(r_{i,l}, \lambda=m_{i}^{nucl}) +
 	                          p_{i}^{free} \times Poisson(r_{i,l}, \lambda=m_{i}^{free})}
 \end{aligned}
 \end{equation}
 
 where $r_{i,l}$ is the number of reads at position $l$ in the i-th row of $R$, $m_{i}^{nucl}$ and $m_{i}^{free}$ are the mean parameters of the nucleosome and nucleosome-free classes respectively. Obviously, the nucleosome-free class posterior probability is
 
 \begin{equation}
 \begin{aligned}
 	P(free | r_{i,l}) = 1 - P(nucl | r_{i,l})
 \end{aligned}
 \end{equation}
 
 Then, during the M-step, the class mean parameters are updated using
 
 \begin{equation}
 \begin{aligned}
 	m_{i}^{nucl} = & \sum_{l=1}^{L} r_{i,l} \times P(nucl | r_{i,l}) \\
 	m_{i}^{free} = & \sum_{l=1}^{L} r_{i,l} \times P(free | r_{i,l})
 \end{aligned}
 \end{equation}
 
 and the class probabilities :
 \begin{equation}
 \begin{aligned}
 	p_{i}^{nucl} = & \frac{1} {L} \times \sum_{l=1}^{L} P(nucl | r_{i,l}) \\
 	p_{i}^{free} = & 1 - p_{i}^{nucl}
 \end{aligned}
 \end{equation}
 
 The EM optimization of the parameter estimates was repeated for 10 iterations. At the end of the parameter estimation process, each of the $L$ positions in a region $R_{i}$ were assigned two posterior probabilities $P(nucl | r_{i,l})$ and $P(free | r_{i,l})$ to belong to each class. In all cases, the nucleosome class was the class having the highest mean parameter and the nucleosome free class the class with the smallest ($m_{i}^{nucl} > m_{i}^{free}$).
 
 The binding sites - located in the center of the regions, at position $s = L/2$ - were assumed to be within the NDR. From that point, the NDR was extended using the following procedure :
 
 \SetKwProg{Fn}{}{\{}{}\SetKwFunction{Function}{float NDRextend}%
 \begin{algorithm}[H]
 	\label{encode_peaks_algo_ndr_extend}
 	\Fn{\Function{}}
 	{	\KwData{The posterior probabilities obtained for each position of $r_{i}$.}
 		\KwResult{the left and right coordinates of the NDR}
 		
 		\tcp{NDR only covers the central location}		
 		$left  = s$ \;
 		$right = s$ \;
 		
 		\While{$left \ne 2$ and $right \ne L-1$}
 		{	$p.free.l = P(free|r_{i,left})$ \;
 			$p.free.r = P(free|r_{i,right})$ \;
 			$p.nucl.l = P(nucl|r_{i,left})$ \;
 			$p.nucl.r = P(nucl|r_{i,right})$ \;
 
 			\tcp{bidirectional extension}
 			\If{$prob.free.l > p.nucl.l$ and $p.prob.free.r > p.nucl.r$}
 			{	$left \minuseq 1$ \; 
 				$right \pluseq 1$ \;
 			}
 			
 			\tcp{extension to left}
 			\ElseIf{$prob.free.l > p.nucl.l$}
 			{	$left \minuseq 1$ \;  }
 			
 			\tcp{extension to right}			
 			\ElseIf{$p.prob.free.r > p.nucl.r$}
 			{	$right \pluseq 1$ \; }
 			
 			\tcp{no more extension possible}
 			\Else
 			{	break \; }
 		}
 		
 		\Return{$left$, $right$}
 	}
 	\caption{Searches the coordinates of the NDR using the posterior nucleosome and nucleosome free class probabilities, for a region $R_i$, from its central position.}
 \end{algorithm}
 
 The nucleosome occupancy around CTCF binding sites was measured using ChIP-extract with "wgEncodeAwgTfbsSydhGm12878Ctcfsc15914sc20UniPk" peak list as reference - because it was the CTCF peak list with the most peaks and with the highest proportion of peaks with a CTCF motif -, the ENCODE MNase-seq data described in section \ref{encode_peaks_methods_data} as targets and the following parameters : from -999bp, to 1000bp and window size 10bp.
 
 This matrix was subjected to a ChIPPartitioning partitioning, as described in section \ref{encode_peaks_em_mnase}, to find 4 nucleosome architectures, using shifting and flipping. The resulting posterior probabilities were used to re-orient the data. If the major shift state - that is the shift state with the highest overall probability - for a given region was the "reverse" state, then the row was reversed. The re-oriented matrix was then subjected to the NDR detection. The re-orientation was done for aesthetic purposes only. Because the NDR detection was performed starting from the center position in each region - and given that reverting a vector did not change its central position - this operation had no influence on the NDR detection.
 
 
 
 \subsection{CTCF and JunD interactors}
 
 % Enumerating motif instances genome-wide
 To enumerate instance of CTCF and JunD motif, the hg19 genome assembly was scanned using CTCF (MA0139.1 from JASPAR Core Vertebrate 2014 \citep{mathelier_jaspar_2014}) and JunD (JUND\_HUMAN.H10MO.A from HOCOMOCOv10 \citep{kulakovskiy_hocomoco:_2016}) matrices to produce lists of potential binding sites. A limit score threshold was set as the score corresponding to a pvalue of 1e-5 for each matrix, respectively. This was done using matrix\_scan program from PWMScan \citep{ambrosini_pwmscan:_2018}. Eventually, any motif instance falling inside a region classified as being a repeated element and blacklisted by the ENCODE Consortium was filtered out using count\_filter program from the ChIP-seq tools \citep{ambrosini_chip-seq_2016-1}.
 
 % Measuring motif instance occurence near peaks
 Then, for each TF peak list independently, the number of i) the TF and ii) CTCF/JunD instances +/- 1kb of each peak was measured, in bins of 1bp, using ChIP-extract program from the ChIP-seq tools \citep{ambrosini_chip-seq_2016-1}. The association were measured as follows : using the ChIP-extract results for the given peak list versus i) the TF and ii) CTCF/Jund motif instances, the number of peaks having i) at least one TF and one CTCF/Jund motif instances, ii) only TF motif instances, iii) only CTCF/JunD motif instances or iv) no motif instance. These numbers were used to build a contingency table and a two-sided Fisher exact test for association was performed. The motif relationship was considered significant if the test OR was bigger than 1 and the 95\% CI of the OR did not contain 1 or as a significant motif exclusion if the OR was smaller than 1 and the 95\% CI of the OR did not contain 1.
 
 % Motif density around peaks
 The motif occurence densities were computed from the ChiP-extract result matrices. Out of each matrix, a vector containing the number of motif instances at each possible absolute distance was computed. This was done as follows : first each each non-null cell neighbours were incremented (+/- 5 columns on each side) to turn motif instance hits into non point-like representation. A given cell value could be incremented several times. Second for each row, the column corresponding to the same absolute distances from the peak were summed together (i.g. +1bp with -1bp, +2bp with -2bp, +999bp with -999bp). The first column of the resulting matrix should contain number of motif instances present at the peak center (distance of 0bp), the second column at an absolute distance of 1bp and so one. Eventually, the row were summed up and the resulting vector was considered as the motif density vector for the given peak list. The vectors were used to create a matrix for CTCF motif and Jund motif (a vector corresponds to a row), separately, and the matrix was displayed as a heatmap. The row values were standardized and the rows hierarchically clustered using the euclidean distance.
 
 
 \subsection{EBF1 and nucleosome}
 
 The correlation between EBF1 binding sites and nucleosome dyads was made using ChIP-cor \citep{ambrosini_chip-seq_2016-1}, from the web (\url{https://ccg.epfl.ch/chipseq/chip_cor.php}). The references were the corrected EBF1 peaks (wgEncodeAwgTfbsHaibGm12878Ebf1sc137065Pcr1xUniPk dataset, for more details see section \ref{encode_peaks_methods_data}) and the targets either i) the MNase-seq data released by Gaffney et al. \citep{gaffney_controls_2012} (hg19 / DNase FAIRE etc / Gaffney 2012 ... / All Paired-end samples - 147bp fragments) or ii) the ENCODE MNase-seq data (hg19 / ENCODE DNase FAIRE etc / GSE35586 ... / GM12878 Nucleosome all (SLOW!)). In both cases, "any" strand was selected. Because Gaffney data are paired-ended and represent the fragment midpoint (the dyad), no centering was done. The ENCODE data are single-ended and a centering of 70bp (half a nucleosome) was applied to approximate the fragment midpoint. The count cut-off was set to 1 and the range to -399 to +400bp.
 
 To isolate nucleosomes with an EBF1 binding site, the opposite ChIP-cor analysis was run : Gaffney data as references versus EBF1 binding sites as targets with count cut-off set to 1 and the range to -399 to +400bp. In the results page the "Feature Selection Tool" was used to select dyads with at least 1 EBF1 binding site (threshold parameter) located "From" -99bp "To" 100bp. The count cut-off was set to 9999 and both "Switch to depleted feature" and "Reference feature oriented" set to "Off".
 
 These nucleosome dyads were uploaded to OProf (\url{https://ccg.epfl.ch/ssa/oprof.php}) on the SSA server \citep{ambrosini_signal_2003}. Four individual analyses were run to measure the "WW", "SS", "YRRRRRYYYYYR" and EBF1 motif occurrences. In all cases, the 5' and 3' borders were set to -399bp and 400bp, the window shift to 1bp and the search mode to "bidirectional". For "SS" and "WW", the motif to search was entered as a "Consensus sequence", the window size was set to 2bp, the reference position to 1 and the number of allowed mismatches to 0. For "YRRRRRYYYYYR", the motif was also entered as a "Consensus sequence", the window size was set to 12bp, the reference position to 6 and the number of allowed mismatches to 4. For the EBF1 motif, the JASPAR CORE Vertebrate 2018 "EBF1 MA0154.3 (length=14)" was used with a window size of 14bp, a reference position of 7 and a p-value threshold of 1e-4.
 
 To investigate the chromatin architecture around nucleosome dyads, ChIP-cor was used. Two references were used : i) the nucleosomes with an EBF1 binding site (see above) and ii) the entire Gaffney dataset (hg19 / DNase FAIRE etc / Gaffney 2012 ... / All Paired-end samples - 147bp fragments). For each reference, three analyses were run against different target features : i) DNase-seq data to monitor chromatin accessibility (hg19 / ENCODE DNase FAIRE etc / Boyle 2008 ... DNaseI HS - GM12878 - Rep 1) with "any" strand and no centering, ii) H3K4me2 ChIP-seq data (hg19 / ENCODE ChIP-seq / GSE29611 ... / GM12878 H3k4me2) with "any" strand and a centering of 70bp (half the nucleosome) and iii) positional sequence conservation scores (hg19 / Sequence derived / Vertebrate Conservation (phastCons46way) ... / PHASTCONS VERT46) with "any" strand an no centering. For DNase-seq and sequence conservation, the range was set to -399bp to 400bp with a window with of 1bp. For H3K3me2 data, the range was set to -3999bp to 4000bp with a window width of 10bp. For the DNase-seq and the H3K4me2 data, the count cut-off were set to 1, for the sequence conservation to 9999.
diff --git a/my_thesis.aux b/my_thesis.aux
index 56a96e2..1adac36 100644
--- a/my_thesis.aux
+++ b/my_thesis.aux
@@ -1,152 +1,150 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
 \providecommand\BKM@entry[2]{}
 \catcode `:\active 
 \catcode `;\active 
 \catcode `!\active 
 \catcode `?\active 
 \catcode `"\active 
 \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
 \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
 \global\let\oldcontentsline\contentsline
 \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
 \global\let\oldnewlabel\newlabel
 \gdef\newlabel#1#2{\newlabelxx{#1}#2}
 \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
 \AtEndDocument{\ifx\hyper@anchor\@undefined
 \let\contentsline\oldcontentsline
 \let\newlabel\oldnewlabel
 \fi}
 \fi}
 \global\let\hyper@last\relax 
 \gdef\HyperFirstAtBeginDocument#1{#1}
 \providecommand\HyField@AuxAddToFields[1]{}
 \providecommand\HyField@AuxAddToCoFields[2]{}
 \providecommand \oddpage@label [2]{}
 \babel@aux{english}{}
 \babel@aux{french}{}
 \babel@aux{english}{}
 \@input{head/dedication.aux}
 \@input{head/acknowledgements.aux}
 \BKM@entry{id=1,dest={636861707465722A2E31},srcline={3}}{41636B6E6F776C656467656D656E7473}
 \pgfsyspdfmark {pgfid2}{0}{40463552}
 \pgfsyspdfmark {pgfid1}{6}{40498788}
 \@input{head/preface.aux}
 \BKM@entry{id=2,dest={636861707465722A2E32},srcline={4}}{50726566616365}
 \pgfsyspdfmark {pgfid4}{0}{40463552}
 \pgfsyspdfmark {pgfid3}{6}{40498788}
 \@input{head/abstracts.aux}
 \BKM@entry{id=3,dest={636861707465722A2E33},srcline={9}}{4162737472616374205C28456E676C6973682F4672616E5C3334376169732F446575747363685C29}
 \pgfsyspdfmark {pgfid6}{0}{40463552}
 \pgfsyspdfmark {pgfid5}{6}{40498788}
 \pgfsyspdfmark {pgfid8}{0}{40463552}
 \pgfsyspdfmark {pgfid7}{6}{40498788}
 \pgfsyspdfmark {pgfid10}{0}{40463552}
 \pgfsyspdfmark {pgfid9}{6}{40498788}
 \BKM@entry{id=4,dest={746F632E30},srcline={30}}{436F6E74656E7473}
 \pgfsyspdfmark {pgfid12}{0}{40463552}
 \pgfsyspdfmark {pgfid11}{6}{40498788}
 \@input{main/ch_introduction.aux}
 \BKM@entry{id=5,dest={636861707465722A2E37},srcline={5}}{496E74726F64756374696F6E}
 \pgfsyspdfmark {pgfid14}{0}{40463552}
 \pgfsyspdfmark {pgfid13}{6}{40498788}
 \@input{main/ch_group_projects.aux}
 \BKM@entry{id=6,dest={636861707465722E31},srcline={2}}{5075626C6973686564206C61626F7261746F72792070726F6A65637473}
 \BKM@entry{id=7,dest={636861707465722E31},srcline={5}}{5075626C6973686564206C61626F7261746F72792070726F6A65637473}
 \BKM@entry{id=8,dest={73656374696F6E2E312E31},srcline={12}}{4D6173732047656E6F6D6520416E6E6F746174696F6E207265706F7369746F7279}
 \BKM@entry{id=9,dest={73756273656374696F6E2E312E312E31},srcline={17}}{496E74726F64756374696F6E}
 \BKM@entry{id=10,dest={73756273656374696F6E2E312E312E32},srcline={23}}{4D474120636F6E74656E7420616E64206F7267616E697A6174696F6E}
 \pgfsyspdfmark {pgfid16}{0}{40463552}
 \pgfsyspdfmark {pgfid15}{6}{40511883}
 \BKM@entry{id=11,dest={73756273656374696F6E2E312E312E33},srcline={58}}{436F6E636C7573696F6E73}
 \BKM@entry{id=12,dest={73656374696F6E2E312E32},srcline={64}}{45756B6172796F7469632050726F6D6F746572204461746162617365}
 \BKM@entry{id=13,dest={73756273656374696F6E2E312E322E31},srcline={68}}{496E74726F64756374696F6E}
 \BKM@entry{id=14,dest={73756273656374696F6E2E312E322E32},srcline={86}}{4550446E6577206E6F7720616E6E6F7461746573205C28736F6D65206F665C2920796F7572206D757368726F6F6D7320616E6420766567657461626C6573}
 \BKM@entry{id=15,dest={73756273656374696F6E2E312E322E33},srcline={120}}{496E63726561736564206D617070696E6720707265636973696F6E20696E2068756D616E}
 \BKM@entry{id=16,dest={73756273656374696F6E2E312E322E34},srcline={132}}{496E746567726174696F6E206F66204550446E65772077697468206F74686572207265736F7572636573}
 \BKM@entry{id=17,dest={73756273656374696F6E2E312E322E35},srcline={138}}{436F6E636C7573696F6E73}
 \BKM@entry{id=18,dest={73756273656374696F6E2E312E322E36},srcline={142}}{4D6574686F6473}
 \BKM@entry{id=19,dest={73656374696F6E2E312E33},srcline={150}}{50574D5363616E}
 \BKM@entry{id=20,dest={73756273656374696F6E2E312E332E31},srcline={164}}{496E74726F64756374696F6E}
 \BKM@entry{id=21,dest={73756273656374696F6E2E312E332E32},srcline={191}}{4461746120616E64206D6574686F6473}
 \BKM@entry{id=22,dest={73756273656374696F6E2E312E332E33},srcline={217}}{42656E63686D61726B}
 \BKM@entry{id=23,dest={73756273656374696F6E2E312E332E34},srcline={287}}{436F6E636C7573696F6E73}
 \BKM@entry{id=24,dest={73656374696F6E2E312E34},srcline={297}}{535061722D4B}
 \BKM@entry{id=25,dest={73756273656374696F6E2E312E342E31},srcline={305}}{496E74726F64756374696F6E}
 \BKM@entry{id=26,dest={73756273656374696F6E2E312E342E32},srcline={318}}{4D6574686F6473}
 \BKM@entry{id=27,dest={73756273656374696F6E2E312E342E33},srcline={349}}{526573756C7473}
 \BKM@entry{id=28,dest={73756273656374696F6E2E312E342E34},srcline={356}}{436F6E636C7573696F6E}
 \@input{main/ch_encode_peaks.aux}
 \BKM@entry{id=29,dest={636861707465722E32},srcline={2}}{454E434F4445207065616B7320616E616C79736973}
 \BKM@entry{id=30,dest={636861707465722E32},srcline={5}}{454E434F4445207065616B7320616E616C79736973}
 \BKM@entry{id=31,dest={73656374696F6E2E322E31},srcline={22}}{44617461}
 \pgfsyspdfmark {pgfid18}{0}{40463552}
 \pgfsyspdfmark {pgfid17}{6}{40511883}
 \BKM@entry{id=32,dest={73656374696F6E2E322E32},srcline={45}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206368726F6D6174696E2061726368697465637475726573}
 \BKM@entry{id=33,dest={73756273656374696F6E2E322E322E31},srcline={64}}{44617461207265616C69676E6D656E74}
 \BKM@entry{id=34,dest={73656374696F6E2E322E33},srcline={76}}{4E75636C656F736F6D65206F7267616E697A6174696F6E2061726F756E64207472616E736372697074696F6E20666163746F722062696E64696E67207369746573}
 \BKM@entry{id=35,dest={73656374696F6E2E322E34},srcline={103}}{5468652063617365206F6620435443462C2052414432312C20534D43332C2059593120616E64205A4E46313433}
 \BKM@entry{id=36,dest={73656374696F6E2E322E35},srcline={141}}{4354434620616E64204A756E4420696E7465726163746F6D6573}
 \BKM@entry{id=37,dest={73656374696F6E2E322E36},srcline={230}}{454246312062696E6473206E75636C656F736F6D6573}
 \BKM@entry{id=38,dest={73656374696F6E2E322E37},srcline={267}}{4D6574686F6473}
 \BKM@entry{id=39,dest={73756273656374696F6E2E322E372E31},srcline={269}}{4461746120616E6420646174612070726F63657373696E67}
 \BKM@entry{id=40,dest={73756273656374696F6E2E322E372E32},srcline={282}}{436C617373696669636174696F6E206F66204D4E617365207061747465726E73}
 \BKM@entry{id=41,dest={73756273656374696F6E2E322E372E33},srcline={295}}{5175616E74696679696E67206E75636C656F736F6D6520617272617920696E74656E736974792066726F6D20636C617373696669636174696F6E20726573756C7473}
 \BKM@entry{id=42,dest={73756273656374696F6E2E322E372E34},srcline={324}}{5065616B20636F6C6F63616C697A6174696F6E}
 \BKM@entry{id=43,dest={73756273656374696F6E2E322E372E35},srcline={328}}{4E445220646574656374696F6E}
 \BKM@entry{id=44,dest={73756273656374696F6E2E322E372E36},srcline={420}}{4354434620616E64204A756E4420696E7465726163746F7273}
 \BKM@entry{id=45,dest={73756273656374696F6E2E322E372E37},srcline={432}}{4542463120616E64206E75636C656F736F6D65}
 \@input{main/ch_smile-seq.aux}
 \BKM@entry{id=46,dest={636861707465722E33},srcline={2}}{534D694C452D736571206461746120616E616C79736973}
 \BKM@entry{id=47,dest={636861707465722E33},srcline={5}}{534D694C452D736571206461746120616E616C79736973}
 \BKM@entry{id=48,dest={73756273656374696F6E2E332E302E31},srcline={19}}{496E74726F64756374696F6E}
 \pgfsyspdfmark {pgfid20}{0}{40463552}
 \pgfsyspdfmark {pgfid19}{6}{40511883}
 \BKM@entry{id=49,dest={73756273656374696F6E2E332E302E32},srcline={36}}{48696464656E204D61726B6F76204D6F64656C204D6F74696620646973636F76657279}
 \BKM@entry{id=50,dest={73756273656374696F6E2E332E302E33},srcline={61}}{42696E64696E67206D6F746966206576616C756174696F6E}
 \BKM@entry{id=51,dest={73756273656374696F6E2E332E302E34},srcline={115}}{526573756C7473}
 \BKM@entry{id=52,dest={73756273656374696F6E2E332E302E35},srcline={133}}{436F6E636C7573696F6E73}
 \@input{main/ch_atac-seq.aux}
 \BKM@entry{id=53,dest={636861707465722E34},srcline={2}}{4368726F6D6174696E206163636573736962696C697479206F66206D6F6E6F6379746573}
-\BKM@entry{id=54,dest={73656374696F6E2E342E31},srcline={14}}{415441432D736571}
+\BKM@entry{id=54,dest={73656374696F6E2E342E31},srcline={16}}{415441432D736571}
 \pgfsyspdfmark {pgfid22}{0}{40463552}
 \pgfsyspdfmark {pgfid21}{6}{40511883}
-\BKM@entry{id=55,dest={73656374696F6E2E342E32},srcline={31}}{4D6F6E69746F72696E672054462062696E64696E67}
-\BKM@entry{id=56,dest={73656374696F6E2E342E33},srcline={40}}{54686520616476656E74206F662073696E676C652063656C6C20444746}
-\BKM@entry{id=57,dest={73656374696F6E2E342E34},srcline={48}}{4120717569636B206F76657276696577206F66207363415441432D736571206461746120616E616C79736973}
-\BKM@entry{id=58,dest={73656374696F6E2E342E35},srcline={53}}{4F70656E207175657374696F6E73}
-\BKM@entry{id=59,dest={73656374696F6E2E342E36},srcline={67}}{44617461}
-\BKM@entry{id=60,dest={73656374696F6E2E342E37},srcline={78}}{4964656E74696669636174696F6E206F6620636174616C6F67206F66206368726F6D6174696E2061726368697465637475726573}
-\BKM@entry{id=61,dest={73756273656374696F6E2E342E372E31},srcline={83}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642072656164207061747465726E73}
-\BKM@entry{id=62,dest={73756273656374696F6E2E342E372E32},srcline={103}}{454D53657175656E6365203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E636573}
-\BKM@entry{id=63,dest={73756273656374696F6E2E342E372E33},srcline={193}}{454D4A6F696E74203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E63657320616E64206368726F6D6174696E2061726368697465637475726573}
-\BKM@entry{id=64,dest={73756273656374696F6E2E342E372E34},srcline={228}}{44617461207265616C69676E6D656E74}
-\BKM@entry{id=65,dest={73656374696F6E2E342E38},srcline={242}}{526573756C7473}
-\BKM@entry{id=66,dest={73756273656374696F6E2E342E382E31},srcline={246}}{467261676D656E742073697A6520616E616C79736973}
-\BKM@entry{id=67,dest={73756273656374696F6E2E342E382E32},srcline={275}}{4D6561737572696E67206F70656E206368726F6D6174696E20616E64206E75636C656F736F6D65206F63637570616E6379}
-\BKM@entry{id=68,dest={73756273656374696F6E2E342E382E33},srcline={305}}{4576616C756174696F6E206F6620454D53657175656E636520616E642043684950506172746974696F6E696E67}
-\BKM@entry{id=69,dest={73656374696F6E2E342E39},srcline={375}}{416C69676E696E67207468652062696E64696E67207369746573}
-\BKM@entry{id=70,dest={73656374696F6E2E342E3130},srcline={400}}{4578706C6F72696E6720696E646976696475616C20544620636C6173736573}
-\BKM@entry{id=71,dest={73656374696F6E2E342E3131},srcline={416}}{44697363757373696F6E73}
-\BKM@entry{id=72,dest={73656374696F6E2E342E3132},srcline={426}}{506572737065637469766573}
-\BKM@entry{id=73,dest={73656374696F6E2E342E3133},srcline={436}}{4D6574686F6473}
-\BKM@entry{id=74,dest={73756273656374696F6E2E342E31332E31},srcline={438}}{496D706C656D656E746174696F6E73}
-\BKM@entry{id=75,dest={73756273656374696F6E2E342E31332E32},srcline={453}}{467261676D656E7420636C6173736573}
-\BKM@entry{id=76,dest={73756273656374696F6E2E342E31332E33},srcline={467}}{53696D756C617465642073657175656E636573}
-\BKM@entry{id=77,dest={73756273656374696F6E2E342E31332E34},srcline={471}}{5265616C69676E6D656E74207573696E67204A4153504152206D6F74696673}
-\BKM@entry{id=78,dest={73756273656374696F6E2E342E31332E35},srcline={474}}{446973706C6179206F66206D6F746966206C6F676F}
-\BKM@entry{id=79,dest={73756273656374696F6E2E342E31332E36},srcline={477}}{4D6F64656C20657874656E73696F6E}
-\BKM@entry{id=80,dest={73756273656374696F6E2E342E31332E37},srcline={480}}{45787472616374696E6720646174612061737369676E656420746F206120636C617373}
-\@input{tail/appendix.aux}
-\BKM@entry{id=81,dest={617070656E6469782E41},srcline={5}}{416E20617070656E646978}
-\BKM@entry{id=82,dest={73656374696F6E2E412E31},srcline={7}}{537570706C656D656E746172792066696775726573}
+\BKM@entry{id=55,dest={73656374696F6E2E342E32},srcline={33}}{4D6F6E69746F72696E672054462062696E64696E67}
+\BKM@entry{id=56,dest={73656374696F6E2E342E33},srcline={42}}{54686520616476656E74206F662073696E676C652063656C6C20444746}
+\BKM@entry{id=57,dest={73656374696F6E2E342E34},srcline={69}}{4F70656E20697373756573}
+\BKM@entry{id=58,dest={73656374696F6E2E342E35},srcline={73}}{44617461}
+\BKM@entry{id=59,dest={73656374696F6E2E342E36},srcline={84}}{4964656E74696679696E67206F7665722D726570726573656E746564207369676E616C73}
+\BKM@entry{id=60,dest={73756273656374696F6E2E342E362E31},srcline={89}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642072656164207061747465726E73}
+\BKM@entry{id=61,dest={73756273656374696F6E2E342E362E32},srcline={101}}{454D53657175656E6365203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E636573}
+\BKM@entry{id=62,dest={73756273656374696F6E2E342E362E33},srcline={200}}{454D4A6F696E74203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E63657320616E64206368726F6D6174696E2061726368697465637475726573}
+\BKM@entry{id=63,dest={73756273656374696F6E2E342E362E34},srcline={235}}{44617461207265616C69676E6D656E74}
+\BKM@entry{id=64,dest={73656374696F6E2E342E37},srcline={249}}{526573756C7473}
+\BKM@entry{id=65,dest={73756273656374696F6E2E342E372E31},srcline={253}}{467261676D656E742073697A6520616E616C79736973}
+\BKM@entry{id=66,dest={73756273656374696F6E2E342E372E32},srcline={282}}{4D6561737572696E67206F70656E206368726F6D6174696E20616E64206E75636C656F736F6D65206F63637570616E6379}
+\BKM@entry{id=67,dest={73756273656374696F6E2E342E372E33},srcline={312}}{4576616C756174696F6E206F6620454D53657175656E636520616E642043684950506172746974696F6E696E67}
+\BKM@entry{id=68,dest={73656374696F6E2E342E38},srcline={382}}{416C69676E696E67207468652062696E64696E67207369746573}
+\BKM@entry{id=69,dest={73656374696F6E2E342E39},srcline={407}}{4578706C6F72696E6720696E646976696475616C20544620636C6173736573}
+\BKM@entry{id=70,dest={73656374696F6E2E342E3130},srcline={423}}{44697363757373696F6E73}
+\BKM@entry{id=71,dest={73656374696F6E2E342E3131},srcline={433}}{506572737065637469766573}
+\BKM@entry{id=72,dest={73656374696F6E2E342E3132},srcline={443}}{4D6574686F6473}
+\BKM@entry{id=73,dest={73756273656374696F6E2E342E31322E31},srcline={445}}{496D706C656D656E746174696F6E73}
+\BKM@entry{id=74,dest={73756273656374696F6E2E342E31322E32},srcline={460}}{467261676D656E7420636C6173736573}
+\BKM@entry{id=75,dest={73756273656374696F6E2E342E31322E33},srcline={474}}{53696D756C617465642073657175656E636573}
+\BKM@entry{id=76,dest={73756273656374696F6E2E342E31322E34},srcline={478}}{5265616C69676E6D656E74207573696E67204A4153504152206D6F74696673}
+\BKM@entry{id=77,dest={73756273656374696F6E2E342E31322E35},srcline={523}}{4D6F64656C20657874656E73696F6E}
+\BKM@entry{id=78,dest={73756273656374696F6E2E342E31322E36},srcline={526}}{45787472616374696E6720646174612061737369676E656420746F206120636C617373}
 \@writefile{toc}{\vspace  {\normalbaselineskip }}
+\@input{tail/appendix.aux}
+\BKM@entry{id=79,dest={617070656E6469782E41},srcline={5}}{416E20617070656E646978}
+\BKM@entry{id=80,dest={73656374696F6E2E412E31},srcline={7}}{537570706C656D656E746172792066696775726573}
 \pgfsyspdfmark {pgfid24}{0}{40463552}
 \pgfsyspdfmark {pgfid23}{6}{40511883}
 \@input{tail/biblio.aux}
-\BKM@entry{id=83,dest={73656374696F6E2A2E3632},srcline={3}}{4269626C696F677261706879}
+\BKM@entry{id=81,dest={73656374696F6E2A2E3632},srcline={3}}{4269626C696F677261706879}
 \pgfsyspdfmark {pgfid26}{0}{40463552}
 \pgfsyspdfmark {pgfid25}{6}{40498788}
-\BKM@entry{id=84,dest={617070656E6469782A2E3633},srcline={6}}{4269626C696F677261706879}
+\BKM@entry{id=82,dest={617070656E6469782A2E3633},srcline={6}}{4269626C696F677261706879}
 \@input{tail/cv.aux}
-\BKM@entry{id=85,dest={73656374696F6E2A2E3634},srcline={4}}{437572726963756C756D205669746165}
+\BKM@entry{id=83,dest={73656374696F6E2A2E3634},srcline={4}}{437572726963756C756D205669746165}
diff --git a/my_thesis.bbl b/my_thesis.bbl
index db40db0..e1b4142 100644
--- a/my_thesis.bbl
+++ b/my_thesis.bbl
@@ -1,685 +1,680 @@
 \begin{thebibliography}{}
 
 \bibitem[Adey et~al., 2010]{adey_rapid_2010}
 Adey, A., Morrison, H.~G., {Asan}, Xun, X., Kitzman, J.~O., Turner, E.~H.,
   Stackhouse, B., MacKenzie, A.~P., Caruccio, N.~C., Zhang, X., and Shendure,
   J. (2010).
 \newblock Rapid, low-input, low-bias construction of shotgun fragment libraries
   by high-density in vitro transposition.
 \newblock {\em Genome Biology}, 11(12):R119.
 
 \bibitem[Aerts et~al., 2003]{aerts_toucan:_2003}
 Aerts, S., Thijs, G., Coessens, B., Staes, M., Moreau, Y., and Moor, B.~D.
   (2003).
 \newblock Toucan: deciphering the cis ‐regulatory logic of coregulated genes.
 \newblock {\em Nucleic Acids Research}, 31(6):1753--1764.
 
 \bibitem[Aibar et~al., 2017]{aibar_scenic:_2017}
 Aibar, S., González-Blas, C.~B., Moerman, T., Huynh-Thu, V.~A., Imrichova, H.,
   Hulselmans, G., Rambow, F., Marine, J.-C., Geurts, P., Aerts, J., van~den
   Oord, J., Atak, Z.~K., Wouters, J., and Aerts, S. (2017).
 \newblock {SCENIC}: single-cell regulatory network inference and clustering.
 \newblock {\em Nature Methods}, 14(11):1083--1086.
 
 \bibitem[Alipanahi et~al., 2015]{alipanahi_predicting_2015}
 Alipanahi, B., Delong, A., Weirauch, M.~T., and Frey, B.~J. (2015).
 \newblock Predicting the sequence specificities of {DNA}- and {RNA}-binding
   proteins by deep learning.
 \newblock {\em Nature Biotechnology}, 33(8):831--838.
 
 \bibitem[Ambrosini et~al., 2016a]{ambrosini_chip-seq_2016}
 Ambrosini, G., Dreos, R., Kumar, S., and Bucher, P. (2016a).
 \newblock The {ChIP}-{Seq} tools and web server: a resource for analyzing
   {ChIP}-seq and other types of genomic data.
 \newblock {\em BMC Genomics}, 17:938.
 
 \bibitem[Ambrosini et~al., 2016b]{ambrosini_chip-seq_2016-1}
 Ambrosini, G., Dreos, R., Kumar, S., and Bucher, P. (2016b).
 \newblock The {ChIP}-{Seq} tools and web server: a resource for analyzing
   {ChIP}-seq and other types of genomic data.
 \newblock {\em BMC Genomics}, 17(1):938.
 
 \bibitem[Ambrosini et~al., 2018]{ambrosini_pwmscan:_2018}
 Ambrosini, G., Groux, R., and Bucher, P. (2018).
 \newblock {PWMScan}: a fast tool for scanning entire genomes with a
   position-specific weight matrix.
 \newblock {\em Bioinformatics}, 34(14):2483--2484.
 
 \bibitem[Ambrosini et~al., 2003]{ambrosini_signal_2003}
 Ambrosini, G., Praz, V., Jagannathan, V., and Bucher, P. (2003).
 \newblock Signal search analysis server.
 \newblock {\em Nucleic Acids Research}, 31(13):3618--3620.
 
 \bibitem[Angerer et~al., 2017]{angerer_single_2017}
 Angerer, P., Simon, L., Tritschler, S., Wolf, F.~A., Fischer, D., and Theis,
   F.~J. (2017).
 \newblock Single cells make big data: {New} challenges and opportunities in
   transcriptomics.
 \newblock {\em Current Opinion in Systems Biology}, 4:85--91.
 
 \bibitem[Bailey et~al., 2015]{bailey_znf143_2015}
 Bailey, S.~D., Zhang, X., Desai, K., Aid, M., Corradin, O., Cowper-Sal·lari,
   R., Akhtar-Zaidi, B., Scacheri, P.~C., Haibe-Kains, B., and Lupien, M.
   (2015).
 \newblock {ZNF}143 provides sequence specificity to secure chromatin
   interactions at gene promoters.
 \newblock {\em Nature Communications}, 2:6186.
 
 \bibitem[Bailey et~al., 2009]{bailey_meme_2009}
 Bailey, T.~L., Boden, M., Buske, F.~A., Frith, M., Grant, C.~E., Clementi, L.,
   Ren, J., Li, W.~W., and Noble, W.~S. (2009).
 \newblock {MEME} {Suite}: tools for motif discovery and searching.
 \newblock {\em Nucleic Acids Research}, 37(suppl\_2):W202--W208.
 
 \bibitem[Barrett et~al., 2011]{barrett_ncbi_2011}
 Barrett, T., Troup, D.~B., Wilhite, S.~E., Ledoux, P., Evangelista, C., Kim,
   I.~F., Tomashevsky, M., Marshall, K.~A., Phillippy, K.~H., Sherman, P.~M.,
   Muertter, R.~N., Holko, M., Ayanbule, O., Yefanov, A., and Soboleva, A.
   (2011).
 \newblock {NCBI} {GEO}: archive for functional genomics data sets—10 years
   on.
 \newblock {\em Nucleic Acids Research}, 39(suppl\_1):D1005--D1010.
 
 \bibitem[Barski et~al., 2007]{barski_high-resolution_2007}
 Barski, A., Cuddapah, S., Cui, K., Roh, T.-Y., Schones, D.~E., Wang, Z., Wei,
   G., Chepelev, I., and Zhao, K. (2007).
 \newblock High-{Resolution} {Profiling} of {Histone} {Methylations} in the
   {Human} {Genome}.
 \newblock {\em Cell}, 129(4):823--837.
 
 \bibitem[Beckstette et~al., 2006]{beckstette_fast_2006}
 Beckstette, M., Homann, R., Giegerich, R., and Kurtz, S. (2006).
 \newblock Fast index based algorithms and software for matching position
   specific scoring matrices.
 \newblock {\em BMC Bioinformatics}, 7:389.
 
 \bibitem[Berest et~al., 2018]{berest_quantification_2018}
 Berest, I., Arnold, C., Reyes-Palomares, A., Palla, G., Rasmussen, K.~D.,
   Helin, K., and Zaugg, J. (2018).
 \newblock Quantification of differential transcription factor activity and
   multiomics-based classification into activators and repressors: {diffTF}.
 \newblock {\em bioRxiv}.
 
 \bibitem[Berger and Bulyk, 2009]{berger_universal_2009}
 Berger, M.~F. and Bulyk, M.~L. (2009).
 \newblock Universal protein-binding microarrays for the comprehensive
   characterization of the {DNA}-binding specificities of transcription factors.
 \newblock {\em Nature Protocols}, 4(3):393--411.
 
 \bibitem[Boller et~al., 2018]{boller_defining_2018}
 Boller, S., Li, R., and Grosschedl, R. (2018).
 \newblock Defining {B} {Cell} {Chromatin}: {Lessons} from {EBF}1.
 \newblock {\em Trends in Genetics}, 34(4):257--269.
 
 \bibitem[Boller et~al., 2016]{boller_pioneering_2016}
 Boller, S., Ramamoorthy, S., Akbas, D., Nechanitzky, R., Burger, L., Murr, R.,
   Schübeler, D., and Grosschedl, R. (2016).
 \newblock Pioneering {Activity} of the {C}-{Terminal} {Domain} of {EBF}1
   {Shapes} the {Chromatin} {Landscape} for {B} {Cell} {Programming}.
 \newblock {\em Immunity}, 44(3):527--541.
 
 \bibitem[Boyle et~al., 2008]{boyle_high-resolution_2008}
 Boyle, A.~P., Davis, S., Shulha, H.~P., Meltzer, P., Margulies, E.~H., Weng,
   Z., Furey, T.~S., and Crawford, G.~E. (2008).
 \newblock High-{Resolution} {Mapping} and {Characterization} of {Open}
   {Chromatin} across the {Genome}.
 \newblock {\em Cell}, 132(2):311--322.
 
 \bibitem[Bucher and Trifonov, 1986]{bucher_compilation_1986}
 Bucher, P. and Trifonov, E.~N. (1986).
 \newblock Compilation and analysis of eukaryotic {POL} {II} promoter sequences.
 \newblock {\em Nucleic Acids Research}, 14(24):10009--10026.
 
 \bibitem[Buenrostro et~al., 2013]{buenrostro_transposition_2013}
 Buenrostro, J.~D., Giresi, P.~G., Zaba, L.~C., Chang, H.~Y., and Greenleaf,
   W.~J. (2013).
 \newblock Transposition of native chromatin for fast and sensitive epigenomic
   profiling of open chromatin, {DNA}-binding proteins and nucleosome position.
 \newblock {\em Nature Methods}, 10(12):1213--1218.
 
 \bibitem[Castro-Mondragon et~al., 2017]{castro-mondragon_rsat_2017}
 Castro-Mondragon, J.~A., Jaeger, S., Thieffry, D., Thomas-Chollier, M., and van
   Helden, J. (2017).
 \newblock {RSAT} matrix-clustering: dynamic exploration and redundancy
   reduction of transcription factor binding motif collections.
 \newblock {\em Nucleic Acids Research}, 45(13):e119--e119.
 
 \bibitem[Chatr-aryamontri et~al., 2017]{chatr-aryamontri_biogrid_2017}
 Chatr-aryamontri, A., Oughtred, R., Boucher, L., Rust, J., Chang, C., Kolas,
   N.~K., O'Donnell, L., Oster, S., Theesfeld, C., Sellam, A., Stark, C.,
   Breitkreutz, B.-J., Dolinski, K., and Tyers, M. (2017).
 \newblock The {BioGRID} interaction database: 2017 update.
 \newblock {\em Nucleic Acids Research}, 45(D1):D369--D379.
 
 \bibitem[Cheng et~al., 2012]{cheng_understanding_2012}
 Cheng, C., Alexander, R., Min, R., Leng, J., Yip, K.~Y., Rozowsky, J., Yan,
   K.-K., Dong, X., Djebali, S., Ruan, Y., Davis, C.~A., Carninci, P., Lassman,
   T., Gingeras, T.~R., Guigó, R., Birney, E., Weng, Z., Snyder, M., and
   Gerstein, M. (2012).
 \newblock Understanding transcriptional regulation by integrative analysis of
   transcription factor binding data.
 \newblock {\em Genome Research}, 22(9):1658--1667.
 
 \bibitem[Cirillo et~al., 2002]{cirillo_opening_2002}
 Cirillo, L.~A., Lin, F.~R., Cuesta, I., Friedman, D., Jarnik, M., and Zaret,
   K.~S. (2002).
 \newblock Opening of {Compacted} {Chromatin} by {Early} {Developmental}
   {Transcription} {Factors} {HNF}3 ({FoxA}) and {GATA}-4.
 \newblock {\em Molecular Cell}, 9(2):279--289.
 
 \bibitem[Consortium, 2012]{consortium_integrated_2012}
 Consortium, T. E.~P. (2012).
 \newblock An integrated encyclopedia of {DNA} elements in the human genome.
 \newblock {\em Nature}, 489(7414):57--74.
 
 \bibitem[Dalton et~al., 2009]{dalton_clustering_2009}
 Dalton, L., Ballarin, V., and Brun, M. (2009).
 \newblock Clustering {Algorithms}: {On} {Learning}, {Validation},
   {Performance}, and {Applications} to {Genomics}.
 \newblock {\em Current Genomics}, 10(6):430--445.
 
 \bibitem[Donohoe et~al., 2007]{donohoe_identification_2007}
 Donohoe, M.~E., Zhang, L.-F., Xu, N., Shi, Y., and Lee, J.~T. (2007).
 \newblock Identification of a {Ctcf} {Cofactor}, {Yy}1, for the {X}
   {Chromosome} {Binary} {Switch}.
 \newblock {\em Molecular Cell}, 25(1):43--56.
 
 \bibitem[Dreos et~al., 2013]{dreos_epd_2013}
 Dreos, R., Ambrosini, G., Cavin~Périer, R., and Bucher, P. (2013).
 \newblock {EPD} and {EPDnew}, high-quality promoter resources in the
   next-generation sequencing era.
 \newblock {\em Nucleic Acids Research}, 41(D1):D157--D164.
 
 \bibitem[Dreos et~al., 2017]{dreos_eukaryotic_2017}
 Dreos, R., Ambrosini, G., Groux, R., Cavin Périer, R., and Bucher, P. (2017).
 \newblock The eukaryotic promoter database in its 30th year: focus on
   non-vertebrate organisms.
 \newblock {\em Nucleic Acids Research}, 45(D1):D51--D55.
 
 \bibitem[Dreos et~al., 2018]{dreos_mga_2018}
 Dreos, R., Ambrosini, G., Groux, R., Périer, R.~C., and Bucher, P. (2018).
 \newblock {MGA} repository: a curated data resource for {ChIP}-seq and other
   genome annotated data.
 \newblock {\em Nucleic Acids Research}, 46(D1):D175--D180.
 
 \bibitem[Dreos et~al., 2015]{dreos_eukaryotic_2015}
 Dreos, R., Ambrosini, G., Périer, R.~C., and Bucher, P. (2015).
 \newblock The {Eukaryotic} {Promoter} {Database}: expansion of {EPDnew} and new
   promoter analysis tools.
 \newblock {\em Nucleic Acids Research}, 43(D1):D92--D96.
 
 \bibitem[Fan et~al., 2016]{fan_characterizing_2016}
 Fan, J., Salathia, N., Liu, R., Kaeser, G.~E., Yung, Y.~C., Herman, J.~L.,
   Kaper, F., Fan, J.-B., Zhang, K., Chun, J., and Kharchenko, P.~V. (2016).
 \newblock Characterizing transcriptional heterogeneity through pathway and gene
   set overdispersion analysis.
 \newblock {\em Nature Methods}, 13(3):241--244.
 
 \bibitem[Fu et~al., 2004]{fu_motifviz:_2004}
 Fu, Y., Frith, M.~C., Haverty, P.~M., and Weng, Z. (2004).
 \newblock {MotifViz}: an analysis and visualization tool for motif discovery.
 \newblock {\em Nucleic Acids Research}, 32(suppl\_2):W420--W423.
 
 \bibitem[Fu et~al., 2008]{fu_insulator_2008}
 Fu, Y., Sinha, M., Peterson, C.~L., and Weng, Z. (2008).
 \newblock The {Insulator} {Binding} {Protein} {CTCF} {Positions} 20
   {Nucleosomes} around {Its} {Binding} {Sites} across the {Human} {Genome}.
 \newblock {\em PLOS Genetics}, 4(7):e1000138.
 
 \bibitem[Gaffney et~al., 2012]{gaffney_controls_2012}
 Gaffney, D.~J., McVicker, G., Pai, A.~A., Fondufe-Mittendorf, Y.~N., Lewellen,
   N., Michelini, K., Widom, J., Gilad, Y., and Pritchard, J.~K. (2012).
 \newblock Controls of {Nucleosome} {Positioning} in the {Human} {Genome}.
 \newblock {\em PLoS Genet}, 8(11):e1003036.
 
 \bibitem[Gerstein et~al., 2012]{gerstein_architecture_2012}
 Gerstein, M.~B., Kundaje, A., Hariharan, M., Landt, S.~G., Yan, K.-K., Cheng,
   C., Mu, X.~J., Khurana, E., Rozowsky, J., Alexander, R., Min, R., Alves, P.,
   Abyzov, A., Addleman, N., Bhardwaj, N., Boyle, A.~P., Cayting, P., Charos,
   A., Chen, D.~Z., Cheng, Y., Clarke, D., Eastman, C., Euskirchen, G., Frietze,
   S., Fu, Y., Gertz, J., Grubert, F., Harmanci, A., Jain, P., Kasowski, M.,
   Lacroute, P., Leng, J., Lian, J., Monahan, H., O’Geen, H., Ouyang, Z.,
   Partridge, E.~C., Patacsil, D., Pauli, F., Raha, D., Ramirez, L., Reddy,
   T.~E., Reed, B., Shi, M., Slifer, T., Wang, J., Wu, L., Yang, X., Yip, K.~Y.,
   Zilberman-Schapira, G., Batzoglou, S., Sidow, A., Farnham, P.~J., Myers,
   R.~M., Weissman, S.~M., and Snyder, M. (2012).
 \newblock Architecture of the human regulatory network derived from {ENCODE}
   data.
 \newblock {\em Nature}, 489(7414):91--100.
 
 \bibitem[Ghirlando and Felsenfeld, 2016]{ghirlando_ctcf:_2016}
 Ghirlando, R. and Felsenfeld, G. (2016).
 \newblock {CTCF}: making the right connections.
 \newblock {\em Genes \& Development}, 30(8):881--891.
 
 \bibitem[González-Blas et~al., 2019]{gonzalez-blas_cistopic:_2019}
 González-Blas, C.~B., Minnoye, L., Papasokrati, D., Aibar, S., Hulselmans, G.,
   Christiaens, V., Davie, K., Wouters, J., and Aerts, S. (2019).
 \newblock {cisTopic}: cis-regulatory topic modeling on single-cell {ATAC}-seq
   data.
 \newblock {\em Nature Methods}, 16(5):397.
 
 \bibitem[Grant et~al., 2011]{grant_fimo:_2011}
 Grant, C.~E., Bailey, T.~L., and Noble, W.~S. (2011).
 \newblock {FIMO}: scanning for occurrences of a given motif.
 \newblock {\em Bioinformatics}, 27(7):1017--1018.
 
 \bibitem[Grossman et~al., 2018]{grossman_positional_2018}
 Grossman, S.~R., Engreitz, J., Ray, J.~P., Nguyen, T.~H., Hacohen, N., and
   Lander, E.~S. (2018).
 \newblock Positional specificity of different transcription factor classes
   within enhancers.
 \newblock {\em Proceedings of the National Academy of Sciences},
   115(30):E7222--E7230.
 
 \bibitem[Groux and Bucher, 2019]{groux_spar-k:_2019}
 Groux, R. and Bucher, P. (2019).
 \newblock {SPar}-{K}: a method to partition {NGS} signal data.
 \newblock {\em Bioinformatics}.
 
 \bibitem[Guo et~al., 2012]{guo_high_2012}
 Guo, Y., Mahony, S., and Gifford, D.~K. (2012).
 \newblock High {Resolution} {Genome} {Wide} {Binding} {Event} {Finding} and
   {Motif} {Discovery} {Reveals} {Transcription} {Factor} {Spatial} {Binding}
   {Constraints}.
 \newblock {\em PLOS Computational Biology}, 8(8):e1002638.
 
 \bibitem[Hagman and Lukin, 2005]{hagman_early_2005}
 Hagman, J. and Lukin, K. (2005).
 \newblock Early {B}-cell factor ‘pioneers’ the way for {B}-cell
   development.
 \newblock {\em Trends in Immunology}, 26(9):455--461.
 
 \bibitem[Heinz et~al., 2010]{heinz_simple_2010}
 Heinz, S., Benner, C., Spann, N., Bertolino, E., Lin, Y.~C., Laslo, P., Cheng,
   J.~X., Murre, C., Singh, H., and Glass, C.~K. (2010).
 \newblock Simple {Combinations} of {Lineage}-{Determining} {Transcription}
   {Factors} {Prime} cis-{Regulatory} {Elements} {Required} for {Macrophage} and
   {B} {Cell} {Identities}.
 \newblock {\em Molecular Cell}, 38(4):576--589.
 
-\bibitem[Hepler, 2018]{hepler_10x_2018}
-Hepler, L. (2018).
-\newblock 10x {Genomics} takes gene imaging and analysis tools to the big
-  leagues.
-
 \bibitem[Hertz et~al., 1990]{hertz_identification_1990}
 Hertz, G.~Z., Hartzell, G.~W., and Stormo, G.~D. (1990).
 \newblock Identification of consensus patterns in unaligned {DNA} sequences
   known to be functionally related.
 \newblock {\em Computer applications in the biosciences: CABIOS}, 6(2):81--92.
 
 \bibitem[Hon et~al., 2008]{hon_chromasig:_2008}
 Hon, G., Ren, B., and Wang, W. (2008).
 \newblock {ChromaSig}: {A} {Probabilistic} {Approach} to {Finding} {Common}
   {Chromatin} {Signatures} in the {Human} {Genome}.
 \newblock {\em PLOS Computational Biology}, 4(10):e1000201.
 
 \bibitem[Ioshikhes et~al., 2011]{ioshikhes_variety_2011}
 Ioshikhes, I., Hosid, S., and Pugh, B.~F. (2011).
 \newblock Variety of genomic {DNA} patterns for nucleosome positioning.
 \newblock {\em Genome Research}, 21(11):1863--1871.
 
 \bibitem[Isakova et~al., 2017]{isakova_smile-seq_2017}
 Isakova, A., Groux, R., Imbeault, M., Rainer, P., Alpern, D., Dainese, R.,
   Ambrosini, G., Trono, D., Bucher, P., and Deplancke, B. (2017).
 \newblock {SMiLE}-seq identifies binding motifs of single and dimeric
   transcription factors.
 \newblock {\em Nature Methods}, advance online publication.
 
 \bibitem[Jolma et~al., 2010]{jolma_multiplexed_2010}
 Jolma, A., Kivioja, T., Toivonen, J., Cheng, L., Wei, G., Enge, M., Taipale,
   M., Vaquerizas, J.~M., Yan, J., Sillanpää, M.~J., Bonke, M., Palin, K.,
   Talukder, S., Hughes, T.~R., Luscombe, N.~M., Ukkonen, E., and Taipale, J.
   (2010).
 \newblock Multiplexed massively parallel {SELEX} for characterization of human
   transcription factor binding specificities.
 \newblock {\em Genome Research}, 20(6):861--873.
 
 \bibitem[Jolma et~al., 2013]{jolma_dna-binding_2013}
 Jolma, A., Yan, J., Whitington, T., Toivonen, J., Nitta, K., Rastas, P.,
   Morgunova, E., Enge, M., Taipale, M., Wei, G., Palin, K., Vaquerizas, J.,
   Vincentelli, R., Luscombe, N., Hughes, T., Lemaire, P., Ukkonen, E., Kivioja,
   T., and Taipale, J. (2013).
 \newblock {DNA}-{Binding} {Specificities} of {Human} {Transcription} {Factors}.
 \newblock {\em Cell}, 152(1–2):327--339.
 
 \bibitem[Kent, 2002]{kent_blatblast-like_2002}
 Kent, W.~J. (2002).
 \newblock {BLAT}—{The} {BLAST}-{Like} {Alignment} {Tool}.
 \newblock {\em Genome Research}, 12(4):656--664.
 
 \bibitem[Khan et~al., 2018]{khan_jaspar_2018}
 Khan, A., Fornes, O., Stigliani, A., Gheorghe, M., Castro-Mondragon, J.~A., van
   der Lee, R., Bessy, A., Chèneby, J., Kulkarni, S.~R., Tan, G., Baranasic,
   D., Arenillas, D.~J., Sandelin, A., Vandepoele, K., Lenhard, B., Ballester,
   B., Wasserman, W.~W., Parcy, F., and Mathelier, A. (2018).
 \newblock {JASPAR} 2018: update of the open-access database of transcription
   factor binding profiles and its web framework.
 \newblock {\em Nucleic Acids Research}, 46(D1):D260--D266.
 
 \bibitem[Kiselev et~al., 2017]{kiselev_sc3:_2017}
 Kiselev, V.~Y., Kirschner, K., Schaub, M.~T., Andrews, T., Yiu, A., Chandra,
   T., Natarajan, K.~N., Reik, W., Barahona, M., Green, A.~R., and Hemberg, M.
   (2017).
 \newblock {SC}3: consensus clustering of single-cell {RNA}-seq data.
 \newblock {\em Nature Methods}, 14(5):483--486.
 
 \bibitem[Kulakovskiy et~al., 2018]{kulakovskiy_hocomoco:_2018}
 Kulakovskiy, I.~V., Vorontsov, I.~E., Yevshin, I.~S., Sharipov, R.~N.,
   Fedorova, A.~D., Rumynskiy, E.~I., Medvedeva, Y.~A., Magana-Mora, A., Bajic,
   V.~B., Papatsenko, D.~A., Kolpakov, F.~A., and Makeev, V.~J. (2018).
 \newblock {HOCOMOCO}: towards a complete collection of transcription factor
   binding models for human and mouse via large-scale {ChIP}-{Seq} analysis.
 \newblock {\em Nucleic Acids Research}, 46(D1):D252--D259.
 
 \bibitem[Kulakovskiy et~al., 2016]{kulakovskiy_hocomoco:_2016}
 Kulakovskiy, I.~V., Vorontsov, I.~E., Yevshin, I.~S., Soboleva, A.~V.,
   Kasianov, A.~S., Ashoor, H., Ba-alawi, W., Bajic, V.~B., Medvedeva, Y.~A.,
   Kolpakov, F.~A., and Makeev, V.~J. (2016).
 \newblock {HOCOMOCO}: expansion and enhancement of the collection of
   transcription factor binding sites models.
 \newblock {\em Nucleic Acids Research}, 44(D1):D116--D125.
 
 \bibitem[Kundaje et~al., 2012]{kundaje_ubiquitous_2012}
 Kundaje, A., Kyriazopoulou-Panagiotopoulou, S., Libbrecht, M., Smith, C.~L.,
   Raha, D., Winters, E.~E., Johnson, S.~M., Snyder, M., Batzoglou, S., and
   Sidow, A. (2012).
 \newblock Ubiquitous heterogeneity and asymmetry of the chromatin environment
   at regulatory elements.
 \newblock {\em Genome Research}, 22(9):1735--1747.
 
 \bibitem[Kurotaki et~al., 2017]{kurotaki_transcriptional_2017}
 Kurotaki, D., Sasaki, H., and Tamura, T. (2017).
 \newblock Transcriptional control of monocyte and macrophage development.
 \newblock {\em International Immunology}, 29(3):97--107.
 
 \bibitem[Langmead and Salzberg, 2012]{langmead_fast_2012}
 Langmead, B. and Salzberg, S.~L. (2012).
 \newblock Fast gapped-read alignment with {Bowtie} 2.
 \newblock {\em Nature Methods}, 9(4):357--359.
 
 \bibitem[Langmead et~al., 2009]{langmead_ultrafast_2009}
 Langmead, B., Trapnell, C., Pop, M., and Salzberg, S.~L. (2009).
 \newblock Ultrafast and memory-efficient alignment of short {DNA} sequences to
   the human genome.
 \newblock {\em Genome Biology}, 10(3):R25.
 
 \bibitem[Li et~al., 2009]{li_sequence_2009}
 Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., Marth,
   G., Abecasis, G., and Durbin, R. (2009).
 \newblock The {Sequence} {Alignment}/{Map} format and {SAMtools}.
 \newblock {\em Bioinformatics}, 25(16):2078--2079.
 
 \bibitem[Li et~al., 2019]{li_identification_2019}
 Li, Z., Schulz, M.~H., Look, T., Begemann, M., Zenke, M., and Costa, I.~G.
   (2019).
 \newblock Identification of transcription factor binding sites using
   {ATAC}-seq.
 \newblock {\em Genome Biology}, 20(1):45.
 
 \bibitem[Lizio et~al., 2015]{lizio_gateways_2015}
 Lizio, M., Harshbarger, J., Shimoji, H., Severin, J., Kasukawa, T., Sahin, S.,
   Abugessaisa, I., Fukuda, S., Hori, F., Ishikawa-Kato, S., Mungall, C.~J.,
   Arner, E., Baillie, J.~K., Bertin, N., Bono, H., de~Hoon, M., Diehl, A.~D.,
   Dimont, E., Freeman, T.~C., Fujieda, K., Hide, W., Kaliyaperumal, R.,
   Katayama, T., Lassmann, T., Meehan, T.~F., Nishikata, K., Ono, H., Rehli, M.,
   Sandelin, A., Schultes, E.~A., ‘t Hoen, P.~A., Tatum, Z., Thompson, M.,
   Toyoda, T., Wright, D.~W., Daub, C.~O., Itoh, M., Carninci, P., Hayashizaki,
   Y., Forrest, A.~R., Kawaji, H., and {the FANTOM consortium} (2015).
 \newblock Gateways to the {FANTOM}5 promoter level mammalian expression atlas.
 \newblock {\em Genome Biology}, 16(1):22.
 
 \bibitem[Losada, 2014]{losada_cohesin_2014}
 Losada, A. (2014).
 \newblock Cohesin in cancer: chromosome segregation and beyond.
 \newblock {\em Nature Reviews Cancer}, 14(6):389--393.
 
 \bibitem[Maerkl and Quake, 2007]{maerkl_systems_2007}
 Maerkl, S.~J. and Quake, S.~R. (2007).
 \newblock A {Systems} {Approach} to {Measuring} the {Binding} {Energy}
   {Landscapes} of {Transcription} {Factors}.
 \newblock {\em Science}, 315(5809):233--237.
 
 \bibitem[Maier et~al., 2004]{maier_early_2004}
 Maier, H., Ostraat, R., Gao, H., Fields, S., Shinton, S.~A., Medina, K.~L.,
   Ikawa, T., Murre, C., Singh, H., Hardy, R.~R., and Hagman, J. (2004).
 \newblock Early {B} cell factor cooperates with {Runx}1 and mediates epigenetic
   changes associated with mb-1 transcription.
 \newblock {\em Nature Immunology}, 5(10):1069--1077.
 
 \bibitem[Marsland, 2015]{marsland_machine_2015-1}
 Marsland, S. (2015).
 \newblock {\em Machine {Learning}, {An} algorithmic {Perspective}, {Chapter} 7
   {Probabilistic} {Learning}}.
 \newblock CRC Press, Boca Raton, second edition edition.
 
 \bibitem[Mathelier et~al., 2014]{mathelier_jaspar_2014}
 Mathelier, A., Zhao, X., Zhang, A.~W., Parcy, F., Worsley-Hunt, R., Arenillas,
   D.~J., Buchman, S., Chen, C.-y., Chou, A., Ienasescu, H., Lim, J., Shyr, C.,
   Tan, G., Zhou, M., Lenhard, B., Sandelin, A., and Wasserman, W.~W. (2014).
 \newblock {JASPAR} 2014: an extensively expanded and updated open-access
   database of transcription factor binding profiles.
 \newblock {\em Nucleic Acids Research}, 42(D1):D142--D147.
 
 \bibitem[Nair et~al., 2014]{nair_probabilistic_2014}
 Nair, N.~U., Kumar, S., Moret, B. M.~E., and Bucher, P. (2014).
 \newblock Probabilistic partitioning methods to find significant patterns in
   {ChIP}-{Seq} data.
 \newblock {\em Bioinformatics}, 30(17):2406--2413.
 
 \bibitem[Neph et~al., 2012]{neph_expansive_2012}
 Neph, S., Vierstra, J., Stergachis, A.~B., Reynolds, A.~P., Haugen, E., Vernot,
   B., Thurman, R.~E., John, S., Sandstrom, R., Johnson, A.~K., Maurano, M.~T.,
   Humbert, R., Rynes, E., Wang, H., Vong, S., Lee, K., Bates, D., Diegel, M.,
   Roach, V., Dunn, D., Neri, J., Schafer, A., Hansen, R.~S., Kutyavin, T.,
   Giste, E., Weaver, M., Canfield, T., Sabo, P., Zhang, M., Balasundaram, G.,
   Byron, R., MacCoss, M.~J., Akey, J.~M., Bender, M.~A., Groudine, M., Kaul,
   R., and Stamatoyannopoulos, J.~A. (2012).
 \newblock An expansive human regulatory lexicon encoded in transcription factor
   footprints.
 \newblock {\em Nature}, 489(7414):83--90.
 
 \bibitem[Nielsen et~al., 2012]{nielsen_catchprofiles:_2012}
 Nielsen, F. G.~G., Markus, K.~G., Friborg, R.~M., Favrholdt, L.~M.,
   Stunnenberg, H.~G., and Huynen, M. (2012).
 \newblock {CATCHprofiles}: {Clustering} and {Alignment} {Tool} for {ChIP}
   {Profiles}.
 \newblock {\em PLOS ONE}, 7(1):e28272.
 
 \bibitem[Ong and Corces, 2014]{ong_ctcf:_2014}
 Ong, C.-T. and Corces, V.~G. (2014).
 \newblock {CTCF}: an architectural protein bridging genome topology and
   function.
 \newblock {\em Nature Reviews Genetics}, 15(4):234--246.
 
 \bibitem[Orenstein and Shamir, 2014]{orenstein_comparative_2014}
 Orenstein, Y. and Shamir, R. (2014).
 \newblock A comparative analysis of transcription factor binding models learned
   from {PBM}, {HT}-{SELEX} and {ChIP} data.
 \newblock {\em Nucleic Acids Research}, 42(8):e63--e63.
 
 \bibitem[Ou et~al., 2018]{ou_motifstack_2018}
 Ou, J., Wolfe, S.~A., Brodsky, M.~H., and Zhu, L.~J. (2018).
 \newblock {motifStack} for the analysis of transcription factor binding site
   evolution.
 \newblock {\em Nature Methods}, 15(1):8--9.
 
 \bibitem[Pizzi and Ukkonen, 2008]{pizzi_fast_2008}
 Pizzi, C. and Ukkonen, E. (2008).
 \newblock Fast profile matching algorithms — {A} survey.
 \newblock {\em Theoretical Computer Science}, 395(2):137--157.
 
 \bibitem[Pollard et~al., 2010]{pollard_detection_2010}
 Pollard, K.~S., Hubisz, M.~J., Rosenbloom, K.~R., and Siepel, A. (2010).
 \newblock Detection of nonneutral substitution rates on mammalian phylogenies.
 \newblock {\em Genome Research}, 20(1):110--121.
 
 \bibitem[Quinlan and Hall, 2010]{quinlan_bedtools:_2010}
 Quinlan, A.~R. and Hall, I.~M. (2010).
 \newblock {BEDTools}: a flexible suite of utilities for comparing genomic
   features.
 \newblock {\em Bioinformatics}, 26(6):841--842.
 
 \bibitem[Raney et~al., 2014]{raney_track_2014}
 Raney, B.~J., Dreszer, T.~R., Barber, G.~P., Clawson, H., Fujita, P.~A., Wang,
   T., Nguyen, N., Paten, B., Zweig, A.~S., Karolchik, D., and Kent, W.~J.
   (2014).
 \newblock Track data hubs enable visualization of user-defined genome-wide
   annotations on the {UCSC} {Genome} {Browser}.
 \newblock {\em Bioinformatics}, 30(7):1003--1005.
 
 \bibitem[Rico et~al., 2017]{rico_comparative_2017}
 Rico, D., Martens, J.~H., Downes, K., Carrillo-de Santa-Pau, E., Pancaldi, V.,
   Breschi, A., Richardson, D., Heath, S., Saeed, S., Frontini, M., Chen, L.,
   Watt, S., Müller, F., Clarke, L., Kerstens, H.~H., Wilder, S.~P., Palumbo,
   E., Djebali, S., Raineri, E., Merkel, A., Esteve-Codina, A., Sultan, M.,
   Bommel, A.~v., Gut, M., Yaspo, M.-L., Rubio, M., Fernandez, J.~M., Attwood,
   A., Torre, V. d.~l., Royo, R., Fragkogianni, S., Gelpí, J.~L., Torrents, D.,
   Iotchkova, V., Logie, C., Aghajanirefah, A., Singh, A.~A., Janssen-Megens,
   E.~M., Berentsen, K., Erber, W., Rendon, A., Kostadima, M., Loos, R., Ent, M.
   A. v.~d., Kaan, A., Sharifi, N., Paul, D.~S., Ifrim, D.~C., Quintin, J.,
   Love, M.~I., Pisano, D.~G., Burden, F., Foad, N., Farrow, S., Zerbino, D.~R.,
   Dunham, I., Kuijpers, T., Lehrach, H., Lengauer, T., Bertone, P., Netea,
   M.~G., Vingron, M., Beck, S., Flicek, P., Gut, I., Ouwehand, W.~H., Bock, C.,
   Soranzo, N., Guigo, R., Valencia, A., and Stunnenberg, H.~G. (2017).
 \newblock Comparative analysis of neutrophil and monocyte epigenomes.
 \newblock {\em bioRxiv}, page 237784.
 
 \bibitem[{Roadmap Epigenomics Consortium} et~al.,
   2015]{roadmap_epigenomics_consortium_integrative_2015}
 {Roadmap Epigenomics Consortium}, Kundaje, A., Meuleman, W., Ernst, J.,
   Bilenky, M., Yen, A., Heravi-Moussavi, A., Kheradpour, P., Zhang, Z., Wang,
   J., Ziller, M.~J., Amin, V., Whitaker, J.~W., Schultz, M.~D., Ward, L.~D.,
   Sarkar, A., Quon, G., Sandstrom, R.~S., Eaton, M.~L., Wu, Y.-C., Pfenning,
   A.~R., Wang, X., Claussnitzer, M., {Yaping Liu}, Coarfa, C., Alan~Harris, R.,
   Shoresh, N., Epstein, C.~B., Gjoneska, E., Leung, D., Xie, W., David~Hawkins,
   R., Lister, R., Hong, C., Gascard, P., Mungall, A.~J., Moore, R., Chuah, E.,
   Tam, A., Canfield, T.~K., Scott~Hansen, R., Kaul, R., Sabo, P.~J., Bansal,
   M.~S., Carles, A., Dixon, J.~R., Farh, K.-H., Feizi, S., Karlic, R., Kim,
   A.-R., Kulkarni, A., Li, D., Lowdon, R., Elliott, G., Mercer, T.~R., Neph,
   S.~J., Onuchic, V., Polak, P., Rajagopal, N., Ray, P., Sallari, R.~C.,
   Siebenthall, K.~T., Sinnott-Armstrong, N.~A., Stevens, M., Thurman, R.~E.,
   Wu, J., Zhang, B., Zhou, X., Beaudet, A.~E., Boyer, L.~A., Jager, P. L.~D.,
   Farnham, P.~J., Fisher, S.~J., Haussler, D., Jones, S. J.~M., Li, W., Marra,
   M.~A., McManus, M.~T., Sunyaev, S., Thomson, J.~A., Tlsty, T.~D., Tsai,
   L.-H., Wang, W., Waterland, R.~A., Zhang, M.~Q., Chadwick, L.~H., Bernstein,
   B.~E., Costello, J.~F., Ecker, J.~R., Hirst, M., Meissner, A., Milosavljevic,
   A., Ren, B., Stamatoyannopoulos, J.~A., Wang, T., and Kellis, M. (2015).
 \newblock Integrative analysis of 111 reference human epigenomes.
 \newblock {\em Nature}, 518(7539):317--330.
 
 \bibitem[Rustici et~al., 2013]{rustici_arrayexpress_2013}
 Rustici, G., Kolesnikov, N., Brandizi, M., Burdett, T., Dylag, M., Emam, I.,
   Farne, A., Hastings, E., Ison, J., Keays, M., Kurbatova, N., Malone, J.,
   Mani, R., Mupo, A., Pedro~Pereira, R., Pilicheva, E., Rung, J., Sharma, A.,
   Tang, Y.~A., Ternent, T., Tikhonov, A., Welter, D., Williams, E., Brazma, A.,
   Parkinson, H., and Sarkans, U. (2013).
 \newblock {ArrayExpress} update—trends in database growth and links to data
   analysis tools.
 \newblock {\em Nucleic Acids Research}, 41(D1):D987--D990.
 
 \bibitem[Schones et~al., 2007]{schones_statistical_2007}
 Schones, D.~E., Smith, A.~D., and Zhang, M.~Q. (2007).
 \newblock Statistical significance of cis-regulatory modules.
 \newblock {\em BMC Bioinformatics}, 8(1):19.
 
 \bibitem[Schütz and Delorenzi, 2008]{schutz_mamot:_2008}
 Schütz, F. and Delorenzi, M. (2008).
 \newblock {MAMOT}: hidden {Markov} modeling tool.
 \newblock {\em Bioinformatics}, 24(11):1399--1400.
 
 \bibitem[Siepel et~al., 2005]{siepel_evolutionarily_2005}
 Siepel, A., Bejerano, G., Pedersen, J.~S., Hinrichs, A.~S., Hou, M.,
   Rosenbloom, K., Clawson, H., Spieth, J., Hillier, L.~W., Richards, S.,
   Weinstock, G.~M., Wilson, R.~K., Gibbs, R.~A., Kent, W.~J., Miller, W., and
   Haussler, D. (2005).
 \newblock Evolutionarily conserved elements in vertebrate, insect, worm, and
   yeast genomes.
 \newblock {\em Genome Research}, 15(8):1034--1050.
 
 \bibitem[Soufi et~al., 2015]{soufi_pioneer_2015}
 Soufi, A., Garcia, M.~F., Jaroszewicz, A., Osman, N., Pellegrini, M., and
   Zaret, K.~S. (2015).
 \newblock Pioneer {Transcription} {Factors} {Target} {Partial} {DNA} {Motifs}
   on {Nucleosomes} to {Initiate} {Reprogramming}.
 \newblock {\em Cell}, 161(3):555--568.
 
 \bibitem[Stedman et~al., 2008]{stedman_cohesins_2008}
 Stedman, W., Kang, H., Lin, S., Kissil, J.~L., Bartolomei, M.~S., and
   Lieberman, P.~M. (2008).
 \newblock Cohesins localize with {CTCF} at the {KSHV} latency control region
   and at cellular c-myc and {H}19 {Igf}2 insulators.
 \newblock {\em The EMBO Journal}, 27(4):654--666.
 
 \bibitem[Trifonov, 2011]{trifonov_cracking_2011}
 Trifonov, E.~N. (2011).
 \newblock Cracking the chromatin code: {Precise} rule of nucleosome
   positioning.
 \newblock {\em Physics of Life Reviews}, 8(1):39--50.
 
 \bibitem[Turatsinze et~al., 2008]{turatsinze_using_2008}
 Turatsinze, J.-V., Thomas-Chollier, M., Defrance, M., and Helden, J.~v. (2008).
 \newblock Using {RSAT} to scan genome sequences for transcription factor
   binding sites and cis -regulatory modules.
 \newblock {\em Nature Protocols}, 3(10):1578--1588.
 
 \bibitem[Vierstra and Stamatoyannopoulos, 2016]{vierstra_genomic_2016}
 Vierstra, J. and Stamatoyannopoulos, J.~A. (2016).
 \newblock Genomic footprinting.
 \newblock {\em Nature Methods}, 13(3):213--221.
 
 \bibitem[Voss and Hager, 2014]{voss_dynamic_2014}
 Voss, T.~C. and Hager, G.~L. (2014).
 \newblock Dynamic regulation of transcriptional states by chromatin and
   transcription factors.
 \newblock {\em Nature Reviews Genetics}, 15(2):69--81.
 
 \bibitem[Wang et~al., 2012]{wang_sequence_2012}
 Wang, J., Zhuang, J., Iyer, S., Lin, X., Whitfield, T.~W., Greven, M.~C.,
   Pierce, B.~G., Dong, X., Kundaje, A., Cheng, Y., Rando, O.~J., Birney, E.,
   Myers, R.~M., Noble, W.~S., Snyder, M., and Weng, Z. (2012).
 \newblock Sequence features and chromatin structure around the genomic regions
   bound by 119 human transcription factors.
 \newblock {\em Genome Research}, 22(9):1798--1812.
 
 \bibitem[Weirauch et~al., 2013]{weirauch_evaluation_2013}
 Weirauch, M.~T., Cote, A., Norel, R., Annala, M., Zhao, Y., Riley, T.~R.,
   Saez-Rodriguez, J., Cokelaer, T., Vedenko, A., Talukder, S., {Dream5
   Consortium}, Bussemaker, H.~J., Morris, Q.~D., Bulyk, M.~L., Stolovitzky, G.,
   and Hughes, T.~R. (2013).
 \newblock Evaluation of methods for modeling transcription factor sequence
   specificity.
 \newblock {\em Nature Biotechnology}, 31(2):126--134.
 
 \bibitem[Wu et~al., 2016]{wu_biogps:_2016}
 Wu, C., Jin, X., Tsueng, G., Afrasiabi, C., and Su, A.~I. (2016).
 \newblock {BioGPS}: building your own mash-up of gene annotations and
   expression profiles.
 \newblock {\em Nucleic Acids Research}, 44(D1):D313--D316.
 
 \bibitem[Zaret and Carroll, 2011]{zaret_pioneer_2011}
 Zaret, K.~S. and Carroll, J.~S. (2011).
 \newblock Pioneer transcription factors: establishing competence for gene
   expression.
 \newblock {\em Genes \& Development}, 25(21):2227--2241.
 
 \bibitem[Zhang et~al., 2014]{zhang_canonical_2014}
 Zhang, Y., Vastenhouw, N.~L., Feng, J., Fu, K., Wang, C., Ge, Y., Pauli, A.,
   Hummelen, P.~v., Schier, A.~F., and Liu, X.~S. (2014).
 \newblock Canonical nucleosome organization at promoters forms during genome
   activation.
 \newblock {\em Genome Research}, 24(2):260--266.
 
 \bibitem[Zhao et~al., 2005]{zhao_tred:_2005}
 Zhao, F., Xuan, Z., Liu, L., and Zhang, M.~Q. (2005).
 \newblock {TRED}: a {Transcriptional} {Regulatory} {Element} {Database} and a
   platform for in silico gene regulation studies.
 \newblock {\em Nucleic Acids Research}, 33(suppl\_1):D103--D107.
 
 \bibitem[Zhao et~al., 2009]{zhao_inferring_2009}
 Zhao, Y., Granas, D., and Stormo, G.~D. (2009).
 \newblock Inferring {Binding} {Energies} from {Selected} {Binding} {Sites}.
 \newblock {\em PLOS Comput Biol}, 5(12):e1000590.
 
 \end{thebibliography}
diff --git a/my_thesis.blg b/my_thesis.blg
index a7e7314..682ba77 100644
--- a/my_thesis.blg
+++ b/my_thesis.blg
@@ -1,58 +1,58 @@
 This is BibTeX, Version 0.99d (TeX Live 2017/Debian)
 Capacity: max_strings=100000, hash_size=100000, hash_prime=85009
 The top-level auxiliary file: my_thesis.aux
 A level-1 auxiliary file: head/dedication.aux
 A level-1 auxiliary file: head/acknowledgements.aux
 A level-1 auxiliary file: head/preface.aux
 A level-1 auxiliary file: head/abstracts.aux
 A level-1 auxiliary file: main/ch_introduction.aux
 A level-1 auxiliary file: main/ch_group_projects.aux
 A level-1 auxiliary file: main/ch_encode_peaks.aux
 A level-1 auxiliary file: main/ch_smile-seq.aux
 A level-1 auxiliary file: main/ch_atac-seq.aux
 A level-1 auxiliary file: tail/appendix.aux
 A level-1 auxiliary file: tail/biblio.aux
 The style file: apalike.bst
 A level-1 auxiliary file: tail/cv.aux
 Database file #1: tail/bibliography.bib
-You've used 98 entries,
+You've used 97 entries,
             1935 wiz_defined-function locations,
-            977 strings with 34114 characters,
-and the built_in function-call counts, 60857 in all, are:
-= -- 5362
-> -- 4853
-< -- 22
-+ -- 1849
-- -- 1826
-* -- 6392
-:= -- 10920
-add.period$ -- 293
-call.type$ -- 98
-change.case$ -- 1403
-chr.to.int$ -- 97
-cite$ -- 98
-duplicate$ -- 1378
-empty$ -- 2901
-format.name$ -- 1954
-if$ -- 11077
+            973 strings with 33986 characters,
+and the built_in function-call counts, 60621 in all, are:
+= -- 5338
+> -- 4846
+< -- 21
++ -- 1847
+- -- 1824
+* -- 6377
+:= -- 10873
+add.period$ -- 291
+call.type$ -- 97
+change.case$ -- 1397
+chr.to.int$ -- 96
+cite$ -- 97
+duplicate$ -- 1368
+empty$ -- 2886
+format.name$ -- 1951
+if$ -- 11034
 int.to.chr$ -- 2
 int.to.str$ -- 0
 missing$ -- 98
-newline$ -- 492
-num.names$ -- 294
-pop$ -- 1197
+newline$ -- 488
+num.names$ -- 291
+pop$ -- 1193
 preamble$ -- 1
-purify$ -- 1403
+purify$ -- 1397
 quote$ -- 0
-skip$ -- 1016
+skip$ -- 1007
 stack$ -- 0
-substring$ -- 3497
+substring$ -- 3485
 swap$ -- 110
 text.length$ -- 1
 text.prefix$ -- 0
 top$ -- 0
-type$ -- 586
+type$ -- 580
 warning$ -- 0
-while$ -- 365
+while$ -- 363
 width$ -- 0
-write$ -- 1272
+write$ -- 1262
diff --git a/my_thesis.log b/my_thesis.log
index b2da094..0fa69d6 100644
--- a/my_thesis.log
+++ b/my_thesis.log
@@ -1,2791 +1,2790 @@
-This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12)  19 NOV 2019 15:22
+This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12)  19 NOV 2019 18:25
 entering extended mode
  restricted \write18 enabled.
  %&-line parsing enabled.
 **my_thesis.tex
 (./my_thesis.tex
 LaTeX2e <2017-04-15>
 Babel <3.18> and hyphenation patterns for 84 language(s) loaded.
 (./head/settings_epfl_template.tex
 (/usr/share/texlive/texmf-dist/tex/latex/base/book.cls
 Document Class: book 2014/09/29 v1.4h Standard LaTeX document class
 (/usr/share/texlive/texmf-dist/tex/latex/base/bk11.clo
 File: bk11.clo 2014/09/29 v1.4h Standard LaTeX file (size option)
 )
 \c@part=\count79
 \c@chapter=\count80
 \c@section=\count81
 \c@subsection=\count82
 \c@subsubsection=\count83
 \c@paragraph=\count84
 \c@subparagraph=\count85
 \c@figure=\count86
 \c@table=\count87
 \abovecaptionskip=\skip41
 \belowcaptionskip=\skip42
 \bibindent=\dimen102
 )
 (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty
 Package: fontenc 2017/04/05 v2.0i Standard LaTeX package
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def
 File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file
 LaTeX Font Info:    Redeclaring font encoding T1 on input line 48.
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty
 Package: inputenc 2015/03/17 v1.2c Input encoding file
 \inpenc@prehook=\toks14
 \inpenc@posthook=\toks15
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def
 File: utf8.def 2017/01/28 v1.1t UTF-8 support for inputenc
 Now handling font encoding OML ...
 ... no UTF-8 mapping file for font encoding OML
 Now handling font encoding T1 ...
 ... processing UTF-8 mapping file for font encoding T1
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu
 File: t1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc
    defining Unicode char U+00A0 (decimal 160)
    defining Unicode char U+00A1 (decimal 161)
    defining Unicode char U+00A3 (decimal 163)
    defining Unicode char U+00AB (decimal 171)
    defining Unicode char U+00AD (decimal 173)
    defining Unicode char U+00BB (decimal 187)
    defining Unicode char U+00BF (decimal 191)
    defining Unicode char U+00C0 (decimal 192)
    defining Unicode char U+00C1 (decimal 193)
    defining Unicode char U+00C2 (decimal 194)
    defining Unicode char U+00C3 (decimal 195)
    defining Unicode char U+00C4 (decimal 196)
    defining Unicode char U+00C5 (decimal 197)
    defining Unicode char U+00C6 (decimal 198)
    defining Unicode char U+00C7 (decimal 199)
    defining Unicode char U+00C8 (decimal 200)
    defining Unicode char U+00C9 (decimal 201)
    defining Unicode char U+00CA (decimal 202)
    defining Unicode char U+00CB (decimal 203)
    defining Unicode char U+00CC (decimal 204)
    defining Unicode char U+00CD (decimal 205)
    defining Unicode char U+00CE (decimal 206)
    defining Unicode char U+00CF (decimal 207)
    defining Unicode char U+00D0 (decimal 208)
    defining Unicode char U+00D1 (decimal 209)
    defining Unicode char U+00D2 (decimal 210)
    defining Unicode char U+00D3 (decimal 211)
    defining Unicode char U+00D4 (decimal 212)
    defining Unicode char U+00D5 (decimal 213)
    defining Unicode char U+00D6 (decimal 214)
    defining Unicode char U+00D8 (decimal 216)
    defining Unicode char U+00D9 (decimal 217)
    defining Unicode char U+00DA (decimal 218)
    defining Unicode char U+00DB (decimal 219)
    defining Unicode char U+00DC (decimal 220)
    defining Unicode char U+00DD (decimal 221)
    defining Unicode char U+00DE (decimal 222)
    defining Unicode char U+00DF (decimal 223)
    defining Unicode char U+00E0 (decimal 224)
    defining Unicode char U+00E1 (decimal 225)
    defining Unicode char U+00E2 (decimal 226)
    defining Unicode char U+00E3 (decimal 227)
    defining Unicode char U+00E4 (decimal 228)
    defining Unicode char U+00E5 (decimal 229)
    defining Unicode char U+00E6 (decimal 230)
    defining Unicode char U+00E7 (decimal 231)
    defining Unicode char U+00E8 (decimal 232)
    defining Unicode char U+00E9 (decimal 233)
    defining Unicode char U+00EA (decimal 234)
    defining Unicode char U+00EB (decimal 235)
    defining Unicode char U+00EC (decimal 236)
    defining Unicode char U+00ED (decimal 237)
    defining Unicode char U+00EE (decimal 238)
    defining Unicode char U+00EF (decimal 239)
    defining Unicode char U+00F0 (decimal 240)
    defining Unicode char U+00F1 (decimal 241)
    defining Unicode char U+00F2 (decimal 242)
    defining Unicode char U+00F3 (decimal 243)
    defining Unicode char U+00F4 (decimal 244)
    defining Unicode char U+00F5 (decimal 245)
    defining Unicode char U+00F6 (decimal 246)
    defining Unicode char U+00F8 (decimal 248)
    defining Unicode char U+00F9 (decimal 249)
    defining Unicode char U+00FA (decimal 250)
    defining Unicode char U+00FB (decimal 251)
    defining Unicode char U+00FC (decimal 252)
    defining Unicode char U+00FD (decimal 253)
    defining Unicode char U+00FE (decimal 254)
    defining Unicode char U+00FF (decimal 255)
    defining Unicode char U+0100 (decimal 256)
    defining Unicode char U+0101 (decimal 257)
    defining Unicode char U+0102 (decimal 258)
    defining Unicode char U+0103 (decimal 259)
    defining Unicode char U+0104 (decimal 260)
    defining Unicode char U+0105 (decimal 261)
    defining Unicode char U+0106 (decimal 262)
    defining Unicode char U+0107 (decimal 263)
    defining Unicode char U+0108 (decimal 264)
    defining Unicode char U+0109 (decimal 265)
    defining Unicode char U+010A (decimal 266)
    defining Unicode char U+010B (decimal 267)
    defining Unicode char U+010C (decimal 268)
    defining Unicode char U+010D (decimal 269)
    defining Unicode char U+010E (decimal 270)
    defining Unicode char U+010F (decimal 271)
    defining Unicode char U+0110 (decimal 272)
    defining Unicode char U+0111 (decimal 273)
    defining Unicode char U+0112 (decimal 274)
    defining Unicode char U+0113 (decimal 275)
    defining Unicode char U+0114 (decimal 276)
    defining Unicode char U+0115 (decimal 277)
    defining Unicode char U+0116 (decimal 278)
    defining Unicode char U+0117 (decimal 279)
    defining Unicode char U+0118 (decimal 280)
    defining Unicode char U+0119 (decimal 281)
    defining Unicode char U+011A (decimal 282)
    defining Unicode char U+011B (decimal 283)
    defining Unicode char U+011C (decimal 284)
    defining Unicode char U+011D (decimal 285)
    defining Unicode char U+011E (decimal 286)
    defining Unicode char U+011F (decimal 287)
    defining Unicode char U+0120 (decimal 288)
    defining Unicode char U+0121 (decimal 289)
    defining Unicode char U+0122 (decimal 290)
    defining Unicode char U+0123 (decimal 291)
    defining Unicode char U+0124 (decimal 292)
    defining Unicode char U+0125 (decimal 293)
    defining Unicode char U+0128 (decimal 296)
    defining Unicode char U+0129 (decimal 297)
    defining Unicode char U+012A (decimal 298)
    defining Unicode char U+012B (decimal 299)
    defining Unicode char U+012C (decimal 300)
    defining Unicode char U+012D (decimal 301)
    defining Unicode char U+012E (decimal 302)
    defining Unicode char U+012F (decimal 303)
    defining Unicode char U+0130 (decimal 304)
    defining Unicode char U+0131 (decimal 305)
    defining Unicode char U+0132 (decimal 306)
    defining Unicode char U+0133 (decimal 307)
    defining Unicode char U+0134 (decimal 308)
    defining Unicode char U+0135 (decimal 309)
    defining Unicode char U+0136 (decimal 310)
    defining Unicode char U+0137 (decimal 311)
    defining Unicode char U+0139 (decimal 313)
    defining Unicode char U+013A (decimal 314)
    defining Unicode char U+013B (decimal 315)
    defining Unicode char U+013C (decimal 316)
    defining Unicode char U+013D (decimal 317)
    defining Unicode char U+013E (decimal 318)
    defining Unicode char U+0141 (decimal 321)
    defining Unicode char U+0142 (decimal 322)
    defining Unicode char U+0143 (decimal 323)
    defining Unicode char U+0144 (decimal 324)
    defining Unicode char U+0145 (decimal 325)
    defining Unicode char U+0146 (decimal 326)
    defining Unicode char U+0147 (decimal 327)
    defining Unicode char U+0148 (decimal 328)
    defining Unicode char U+014A (decimal 330)
    defining Unicode char U+014B (decimal 331)
    defining Unicode char U+014C (decimal 332)
    defining Unicode char U+014D (decimal 333)
    defining Unicode char U+014E (decimal 334)
    defining Unicode char U+014F (decimal 335)
    defining Unicode char U+0150 (decimal 336)
    defining Unicode char U+0151 (decimal 337)
    defining Unicode char U+0152 (decimal 338)
    defining Unicode char U+0153 (decimal 339)
    defining Unicode char U+0154 (decimal 340)
    defining Unicode char U+0155 (decimal 341)
    defining Unicode char U+0156 (decimal 342)
    defining Unicode char U+0157 (decimal 343)
    defining Unicode char U+0158 (decimal 344)
    defining Unicode char U+0159 (decimal 345)
    defining Unicode char U+015A (decimal 346)
    defining Unicode char U+015B (decimal 347)
    defining Unicode char U+015C (decimal 348)
    defining Unicode char U+015D (decimal 349)
    defining Unicode char U+015E (decimal 350)
    defining Unicode char U+015F (decimal 351)
    defining Unicode char U+0160 (decimal 352)
    defining Unicode char U+0161 (decimal 353)
    defining Unicode char U+0162 (decimal 354)
    defining Unicode char U+0163 (decimal 355)
    defining Unicode char U+0164 (decimal 356)
    defining Unicode char U+0165 (decimal 357)
    defining Unicode char U+0168 (decimal 360)
    defining Unicode char U+0169 (decimal 361)
    defining Unicode char U+016A (decimal 362)
    defining Unicode char U+016B (decimal 363)
    defining Unicode char U+016C (decimal 364)
    defining Unicode char U+016D (decimal 365)
    defining Unicode char U+016E (decimal 366)
    defining Unicode char U+016F (decimal 367)
    defining Unicode char U+0170 (decimal 368)
    defining Unicode char U+0171 (decimal 369)
    defining Unicode char U+0172 (decimal 370)
    defining Unicode char U+0173 (decimal 371)
    defining Unicode char U+0174 (decimal 372)
    defining Unicode char U+0175 (decimal 373)
    defining Unicode char U+0176 (decimal 374)
    defining Unicode char U+0177 (decimal 375)
    defining Unicode char U+0178 (decimal 376)
    defining Unicode char U+0179 (decimal 377)
    defining Unicode char U+017A (decimal 378)
    defining Unicode char U+017B (decimal 379)
    defining Unicode char U+017C (decimal 380)
    defining Unicode char U+017D (decimal 381)
    defining Unicode char U+017E (decimal 382)
    defining Unicode char U+01CD (decimal 461)
    defining Unicode char U+01CE (decimal 462)
    defining Unicode char U+01CF (decimal 463)
    defining Unicode char U+01D0 (decimal 464)
    defining Unicode char U+01D1 (decimal 465)
    defining Unicode char U+01D2 (decimal 466)
    defining Unicode char U+01D3 (decimal 467)
    defining Unicode char U+01D4 (decimal 468)
    defining Unicode char U+01E2 (decimal 482)
    defining Unicode char U+01E3 (decimal 483)
    defining Unicode char U+01E6 (decimal 486)
    defining Unicode char U+01E7 (decimal 487)
    defining Unicode char U+01E8 (decimal 488)
    defining Unicode char U+01E9 (decimal 489)
    defining Unicode char U+01EA (decimal 490)
    defining Unicode char U+01EB (decimal 491)
    defining Unicode char U+01F0 (decimal 496)
    defining Unicode char U+01F4 (decimal 500)
    defining Unicode char U+01F5 (decimal 501)
    defining Unicode char U+0218 (decimal 536)
    defining Unicode char U+0219 (decimal 537)
    defining Unicode char U+021A (decimal 538)
    defining Unicode char U+021B (decimal 539)
    defining Unicode char U+0232 (decimal 562)
    defining Unicode char U+0233 (decimal 563)
    defining Unicode char U+1E02 (decimal 7682)
    defining Unicode char U+1E03 (decimal 7683)
    defining Unicode char U+200C (decimal 8204)
    defining Unicode char U+2010 (decimal 8208)
    defining Unicode char U+2011 (decimal 8209)
    defining Unicode char U+2012 (decimal 8210)
    defining Unicode char U+2013 (decimal 8211)
    defining Unicode char U+2014 (decimal 8212)
    defining Unicode char U+2015 (decimal 8213)
    defining Unicode char U+2018 (decimal 8216)
    defining Unicode char U+2019 (decimal 8217)
    defining Unicode char U+201A (decimal 8218)
    defining Unicode char U+201C (decimal 8220)
    defining Unicode char U+201D (decimal 8221)
    defining Unicode char U+201E (decimal 8222)
    defining Unicode char U+2030 (decimal 8240)
    defining Unicode char U+2031 (decimal 8241)
    defining Unicode char U+2039 (decimal 8249)
    defining Unicode char U+203A (decimal 8250)
    defining Unicode char U+2423 (decimal 9251)
    defining Unicode char U+1E20 (decimal 7712)
    defining Unicode char U+1E21 (decimal 7713)
 )
 Now handling font encoding OT1 ...
 ... processing UTF-8 mapping file for font encoding OT1
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu
 File: ot1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc
    defining Unicode char U+00A0 (decimal 160)
    defining Unicode char U+00A1 (decimal 161)
    defining Unicode char U+00A3 (decimal 163)
    defining Unicode char U+00AD (decimal 173)
    defining Unicode char U+00B8 (decimal 184)
    defining Unicode char U+00BF (decimal 191)
    defining Unicode char U+00C5 (decimal 197)
    defining Unicode char U+00C6 (decimal 198)
    defining Unicode char U+00D8 (decimal 216)
    defining Unicode char U+00DF (decimal 223)
    defining Unicode char U+00E6 (decimal 230)
    defining Unicode char U+00EC (decimal 236)
    defining Unicode char U+00ED (decimal 237)
    defining Unicode char U+00EE (decimal 238)
    defining Unicode char U+00EF (decimal 239)
    defining Unicode char U+00F8 (decimal 248)
    defining Unicode char U+0131 (decimal 305)
    defining Unicode char U+0141 (decimal 321)
    defining Unicode char U+0142 (decimal 322)
    defining Unicode char U+0152 (decimal 338)
    defining Unicode char U+0153 (decimal 339)
    defining Unicode char U+0174 (decimal 372)
    defining Unicode char U+0175 (decimal 373)
    defining Unicode char U+0176 (decimal 374)
    defining Unicode char U+0177 (decimal 375)
    defining Unicode char U+0218 (decimal 536)
    defining Unicode char U+0219 (decimal 537)
    defining Unicode char U+021A (decimal 538)
    defining Unicode char U+021B (decimal 539)
    defining Unicode char U+2013 (decimal 8211)
    defining Unicode char U+2014 (decimal 8212)
    defining Unicode char U+2018 (decimal 8216)
    defining Unicode char U+2019 (decimal 8217)
    defining Unicode char U+201C (decimal 8220)
    defining Unicode char U+201D (decimal 8221)
 )
 Now handling font encoding OMS ...
 ... processing UTF-8 mapping file for font encoding OMS
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu
 File: omsenc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc
    defining Unicode char U+00A7 (decimal 167)
    defining Unicode char U+00B6 (decimal 182)
    defining Unicode char U+00B7 (decimal 183)
    defining Unicode char U+2020 (decimal 8224)
    defining Unicode char U+2021 (decimal 8225)
    defining Unicode char U+2022 (decimal 8226)
 )
 Now handling font encoding OMX ...
 ... no UTF-8 mapping file for font encoding OMX
 Now handling font encoding U ...
 ... no UTF-8 mapping file for font encoding U
    defining Unicode char U+00A9 (decimal 169)
    defining Unicode char U+00AA (decimal 170)
    defining Unicode char U+00AE (decimal 174)
    defining Unicode char U+00BA (decimal 186)
    defining Unicode char U+02C6 (decimal 710)
    defining Unicode char U+02DC (decimal 732)
    defining Unicode char U+200C (decimal 8204)
    defining Unicode char U+2026 (decimal 8230)
    defining Unicode char U+2122 (decimal 8482)
    defining Unicode char U+2423 (decimal 9251)
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/natbib/natbib.sty
 Package: natbib 2010/09/13 8.31b (PWD, AO)
 \bibhang=\skip43
 \bibsep=\skip44
 LaTeX Info: Redefining \cite on input line 694.
 \c@NAT@ctr=\count88
 )
 (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.sty
 Package: babel 2018/02/14 3.18 The Babel package
 
 (/usr/share/texlive/texmf-dist/tex/generic/babel/switch.def
 File: switch.def 2018/02/14 3.18 Babel switching mechanism
 )
 (/usr/share/texlive/texmf-dist/tex/generic/babel-french/french.ldf
 Language: french 2018/02/04 v3.4b French support from the babel system
 
 (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.def
 File: babel.def 2018/02/14 3.18 Babel common definitions
 \babel@savecnt=\count89
 \U@D=\dimen103
 
 (/usr/share/texlive/texmf-dist/tex/generic/babel/txtbabel.def)
 \bbl@dirlevel=\count90
 )
 \l@acadian = a dialect from \language\l@french 
 \FB@nonchar=\count91
 Package babel Info: Making : an active character on input line 411.
 Package babel Info: Making ; an active character on input line 412.
 Package babel Info: Making ! an active character on input line 413.
 Package babel Info: Making ? an active character on input line 414.
 \FBguill@level=\count92
 \FB@everypar=\toks16
 \FB@Mht=\dimen104
 \mc@charclass=\count93
 \mc@charfam=\count94
 \mc@charslot=\count95
 \std@mcc=\count96
 \dec@mcc=\count97
 \c@FBcaption@count=\count98
 \listindentFB=\skip45
 \descindentFB=\skip46
 \labelwidthFB=\skip47
 \leftmarginFB=\skip48
 \parindentFFN=\dimen105
 \FBfnindent=\skip49
 )
 (/usr/share/texlive/texmf-dist/tex/generic/babel-german/german.ldf
 Language: german 2016/11/02 v2.9 German support for babel (traditional orthogra
 phy)
 
 (/usr/share/texlive/texmf-dist/tex/generic/babel-german/germanb.ldf
 Language: germanb 2016/11/02 v2.9 German support for babel (traditional orthogr
 aphy)
 Package babel Info: Making " an active character on input line 139.
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/babel-english/english.ldf
 Language: english 2017/06/06 v3.3r English support from the babel system
 \l@canadian = a dialect from \language\l@american 
 \l@australian = a dialect from \language\l@british 
 \l@newzealand = a dialect from \language\l@british 
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/carlisle/scalefnt.sty)
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty
 Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
 \KV@toks@=\toks17
 )
 (/usr/share/texmf/tex/latex/lm/lmodern.sty
 Package: lmodern 2009/10/30 v1.6 Latin Modern Fonts
 LaTeX Font Info:    Overwriting symbol font `operators' in version `normal'
 (Font)                  OT1/cmr/m/n --> OT1/lmr/m/n on input line 22.
 LaTeX Font Info:    Overwriting symbol font `letters' in version `normal'
 (Font)                  OML/cmm/m/it --> OML/lmm/m/it on input line 23.
 LaTeX Font Info:    Overwriting symbol font `symbols' in version `normal'
 (Font)                  OMS/cmsy/m/n --> OMS/lmsy/m/n on input line 24.
 LaTeX Font Info:    Overwriting symbol font `largesymbols' in version `normal'
 (Font)                  OMX/cmex/m/n --> OMX/lmex/m/n on input line 25.
 LaTeX Font Info:    Overwriting symbol font `operators' in version `bold'
 (Font)                  OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 26.
 LaTeX Font Info:    Overwriting symbol font `letters' in version `bold'
 (Font)                  OML/cmm/b/it --> OML/lmm/b/it on input line 27.
 LaTeX Font Info:    Overwriting symbol font `symbols' in version `bold'
 (Font)                  OMS/cmsy/b/n --> OMS/lmsy/b/n on input line 28.
 LaTeX Font Info:    Overwriting symbol font `largesymbols' in version `bold'
 (Font)                  OMX/cmex/m/n --> OMX/lmex/m/n on input line 29.
 LaTeX Font Info:    Overwriting math alphabet `\mathbf' in version `normal'
 (Font)                  OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 31.
 LaTeX Font Info:    Overwriting math alphabet `\mathsf' in version `normal'
 (Font)                  OT1/cmss/m/n --> OT1/lmss/m/n on input line 32.
 LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `normal'
 (Font)                  OT1/cmr/m/it --> OT1/lmr/m/it on input line 33.
 LaTeX Font Info:    Overwriting math alphabet `\mathtt' in version `normal'
 (Font)                  OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 34.
 LaTeX Font Info:    Overwriting math alphabet `\mathbf' in version `bold'
 (Font)                  OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 35.
 LaTeX Font Info:    Overwriting math alphabet `\mathsf' in version `bold'
 (Font)                  OT1/cmss/bx/n --> OT1/lmss/bx/n on input line 36.
 LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `bold'
 (Font)                  OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37.
 LaTeX Font Info:    Overwriting math alphabet `\mathtt' in version `bold'
 (Font)                  OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier.sty
 Package: fourier 2005/01/01 1.4 fourier-GUTenberg package
 Now handling font encoding FML ...
 ... no UTF-8 mapping file for font encoding FML
 Now handling font encoding FMS ...
 ... no UTF-8 mapping file for font encoding FMS
 Now handling font encoding FMX ...
 ... no UTF-8 mapping file for font encoding FMX
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty
 Package: fontenc 2017/04/05 v2.0i Standard LaTeX package
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def
 File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file
 LaTeX Font Info:    Redeclaring font encoding T1 on input line 48.
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty
 Package: textcomp 2017/04/05 v2.0i Standard LaTeX package
 Package textcomp Info: Sub-encoding information:
 (textcomp)               5 = only ISO-Adobe without \textcurrency
 (textcomp)               4 = 5 + \texteuro
 (textcomp)               3 = 4 + \textohm
 (textcomp)               2 = 3 + \textestimated + \textcurrency
 (textcomp)               1 = TS1 - \textcircled - \t
 (textcomp)               0 = TS1 (full)
 (textcomp)             Font families with sub-encoding setting implement
 (textcomp)             only a restricted character set as indicated.
 (textcomp)             Family '?' is the default used for unknown fonts.
 (textcomp)             See the documentation for details.
 Package textcomp Info: Setting ? sub-encoding to TS1/1 on input line 79.
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def
 File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file
 Now handling font encoding TS1 ...
 ... processing UTF-8 mapping file for font encoding TS1
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu
 File: ts1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc
    defining Unicode char U+00A2 (decimal 162)
    defining Unicode char U+00A3 (decimal 163)
    defining Unicode char U+00A4 (decimal 164)
    defining Unicode char U+00A5 (decimal 165)
    defining Unicode char U+00A6 (decimal 166)
    defining Unicode char U+00A7 (decimal 167)
    defining Unicode char U+00A8 (decimal 168)
    defining Unicode char U+00A9 (decimal 169)
    defining Unicode char U+00AA (decimal 170)
    defining Unicode char U+00AC (decimal 172)
    defining Unicode char U+00AE (decimal 174)
    defining Unicode char U+00AF (decimal 175)
    defining Unicode char U+00B0 (decimal 176)
    defining Unicode char U+00B1 (decimal 177)
    defining Unicode char U+00B2 (decimal 178)
    defining Unicode char U+00B3 (decimal 179)
    defining Unicode char U+00B4 (decimal 180)
    defining Unicode char U+00B5 (decimal 181)
    defining Unicode char U+00B6 (decimal 182)
    defining Unicode char U+00B7 (decimal 183)
    defining Unicode char U+00B9 (decimal 185)
    defining Unicode char U+00BA (decimal 186)
    defining Unicode char U+00BC (decimal 188)
    defining Unicode char U+00BD (decimal 189)
    defining Unicode char U+00BE (decimal 190)
    defining Unicode char U+00D7 (decimal 215)
    defining Unicode char U+00F7 (decimal 247)
    defining Unicode char U+0192 (decimal 402)
    defining Unicode char U+02C7 (decimal 711)
    defining Unicode char U+02D8 (decimal 728)
    defining Unicode char U+02DD (decimal 733)
    defining Unicode char U+0E3F (decimal 3647)
    defining Unicode char U+2016 (decimal 8214)
    defining Unicode char U+2020 (decimal 8224)
    defining Unicode char U+2021 (decimal 8225)
    defining Unicode char U+2022 (decimal 8226)
    defining Unicode char U+2030 (decimal 8240)
    defining Unicode char U+2031 (decimal 8241)
    defining Unicode char U+203B (decimal 8251)
    defining Unicode char U+203D (decimal 8253)
    defining Unicode char U+2044 (decimal 8260)
    defining Unicode char U+204E (decimal 8270)
    defining Unicode char U+2052 (decimal 8274)
    defining Unicode char U+20A1 (decimal 8353)
    defining Unicode char U+20A4 (decimal 8356)
    defining Unicode char U+20A6 (decimal 8358)
    defining Unicode char U+20A9 (decimal 8361)
    defining Unicode char U+20AB (decimal 8363)
    defining Unicode char U+20AC (decimal 8364)
    defining Unicode char U+20B1 (decimal 8369)
    defining Unicode char U+2103 (decimal 8451)
    defining Unicode char U+2116 (decimal 8470)
    defining Unicode char U+2117 (decimal 8471)
    defining Unicode char U+211E (decimal 8478)
    defining Unicode char U+2120 (decimal 8480)
    defining Unicode char U+2122 (decimal 8482)
    defining Unicode char U+2126 (decimal 8486)
    defining Unicode char U+2127 (decimal 8487)
    defining Unicode char U+212E (decimal 8494)
    defining Unicode char U+2190 (decimal 8592)
    defining Unicode char U+2191 (decimal 8593)
    defining Unicode char U+2192 (decimal 8594)
    defining Unicode char U+2193 (decimal 8595)
    defining Unicode char U+2329 (decimal 9001)
    defining Unicode char U+232A (decimal 9002)
    defining Unicode char U+2422 (decimal 9250)
    defining Unicode char U+25E6 (decimal 9702)
    defining Unicode char U+25EF (decimal 9711)
    defining Unicode char U+266A (decimal 9834)
 ))
 LaTeX Info: Redefining \oldstylenums on input line 334.
 Package textcomp Info: Setting cmr sub-encoding to TS1/0 on input line 349.
 Package textcomp Info: Setting cmss sub-encoding to TS1/0 on input line 350.
 Package textcomp Info: Setting cmtt sub-encoding to TS1/0 on input line 351.
 Package textcomp Info: Setting cmvtt sub-encoding to TS1/0 on input line 352.
 Package textcomp Info: Setting cmbr sub-encoding to TS1/0 on input line 353.
 Package textcomp Info: Setting cmtl sub-encoding to TS1/0 on input line 354.
 Package textcomp Info: Setting ccr sub-encoding to TS1/0 on input line 355.
 Package textcomp Info: Setting ptm sub-encoding to TS1/4 on input line 356.
 Package textcomp Info: Setting pcr sub-encoding to TS1/4 on input line 357.
 Package textcomp Info: Setting phv sub-encoding to TS1/4 on input line 358.
 Package textcomp Info: Setting ppl sub-encoding to TS1/3 on input line 359.
 Package textcomp Info: Setting pag sub-encoding to TS1/4 on input line 360.
 Package textcomp Info: Setting pbk sub-encoding to TS1/4 on input line 361.
 Package textcomp Info: Setting pnc sub-encoding to TS1/4 on input line 362.
 Package textcomp Info: Setting pzc sub-encoding to TS1/4 on input line 363.
 Package textcomp Info: Setting bch sub-encoding to TS1/4 on input line 364.
 Package textcomp Info: Setting put sub-encoding to TS1/5 on input line 365.
 Package textcomp Info: Setting uag sub-encoding to TS1/5 on input line 366.
 Package textcomp Info: Setting ugq sub-encoding to TS1/5 on input line 367.
 Package textcomp Info: Setting ul8 sub-encoding to TS1/4 on input line 368.
 Package textcomp Info: Setting ul9 sub-encoding to TS1/4 on input line 369.
 Package textcomp Info: Setting augie sub-encoding to TS1/5 on input line 370.
 Package textcomp Info: Setting dayrom sub-encoding to TS1/3 on input line 371.
 Package textcomp Info: Setting dayroms sub-encoding to TS1/3 on input line 372.
 
 Package textcomp Info: Setting pxr sub-encoding to TS1/0 on input line 373.
 Package textcomp Info: Setting pxss sub-encoding to TS1/0 on input line 374.
 Package textcomp Info: Setting pxtt sub-encoding to TS1/0 on input line 375.
 Package textcomp Info: Setting txr sub-encoding to TS1/0 on input line 376.
 Package textcomp Info: Setting txss sub-encoding to TS1/0 on input line 377.
 Package textcomp Info: Setting txtt sub-encoding to TS1/0 on input line 378.
 Package textcomp Info: Setting lmr sub-encoding to TS1/0 on input line 379.
 Package textcomp Info: Setting lmdh sub-encoding to TS1/0 on input line 380.
 Package textcomp Info: Setting lmss sub-encoding to TS1/0 on input line 381.
 Package textcomp Info: Setting lmssq sub-encoding to TS1/0 on input line 382.
 Package textcomp Info: Setting lmvtt sub-encoding to TS1/0 on input line 383.
 Package textcomp Info: Setting lmtt sub-encoding to TS1/0 on input line 384.
 Package textcomp Info: Setting qhv sub-encoding to TS1/0 on input line 385.
 Package textcomp Info: Setting qag sub-encoding to TS1/0 on input line 386.
 Package textcomp Info: Setting qbk sub-encoding to TS1/0 on input line 387.
 Package textcomp Info: Setting qcr sub-encoding to TS1/0 on input line 388.
 Package textcomp Info: Setting qcs sub-encoding to TS1/0 on input line 389.
 Package textcomp Info: Setting qpl sub-encoding to TS1/0 on input line 390.
 Package textcomp Info: Setting qtm sub-encoding to TS1/0 on input line 391.
 Package textcomp Info: Setting qzc sub-encoding to TS1/0 on input line 392.
 Package textcomp Info: Setting qhvc sub-encoding to TS1/0 on input line 393.
 Package textcomp Info: Setting futs sub-encoding to TS1/4 on input line 394.
 Package textcomp Info: Setting futx sub-encoding to TS1/4 on input line 395.
 Package textcomp Info: Setting futj sub-encoding to TS1/4 on input line 396.
 Package textcomp Info: Setting hlh sub-encoding to TS1/3 on input line 397.
 Package textcomp Info: Setting hls sub-encoding to TS1/3 on input line 398.
 Package textcomp Info: Setting hlst sub-encoding to TS1/3 on input line 399.
 Package textcomp Info: Setting hlct sub-encoding to TS1/5 on input line 400.
 Package textcomp Info: Setting hlx sub-encoding to TS1/5 on input line 401.
 Package textcomp Info: Setting hlce sub-encoding to TS1/5 on input line 402.
 Package textcomp Info: Setting hlcn sub-encoding to TS1/5 on input line 403.
 Package textcomp Info: Setting hlcw sub-encoding to TS1/5 on input line 404.
 Package textcomp Info: Setting hlcf sub-encoding to TS1/5 on input line 405.
 Package textcomp Info: Setting pplx sub-encoding to TS1/3 on input line 406.
 Package textcomp Info: Setting pplj sub-encoding to TS1/3 on input line 407.
 Package textcomp Info: Setting ptmx sub-encoding to TS1/4 on input line 408.
 Package textcomp Info: Setting ptmj sub-encoding to TS1/4 on input line 409.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier-orns.sty
 Package: fourier-orns 2004/01/30 1.1 fourier-ornaments package
 )
 LaTeX Font Info:    Redeclaring symbol font `operators' on input line 50.
 LaTeX Font Info:    Encoding `OT1' has changed to `T1' for symbol font
 (Font)              `operators' in the math version `normal' on input line 50.
 LaTeX Font Info:    Overwriting symbol font `operators' in version `normal'
 (Font)                  OT1/lmr/m/n --> T1/futs/m/n on input line 50.
 LaTeX Font Info:    Encoding `OT1' has changed to `T1' for symbol font
 (Font)              `operators' in the math version `bold' on input line 50.
 LaTeX Font Info:    Overwriting symbol font `operators' in version `bold'
 (Font)                  OT1/lmr/bx/n --> T1/futs/m/n on input line 50.
 LaTeX Font Info:    Overwriting symbol font `operators' in version `bold'
 (Font)                  T1/futs/m/n --> T1/futs/b/n on input line 51.
 LaTeX Font Info:    Redeclaring symbol font `letters' on input line 59.
 LaTeX Font Info:    Encoding `OML' has changed to `FML' for symbol font
 (Font)              `letters' in the math version `normal' on input line 59.
 LaTeX Font Info:    Overwriting symbol font `letters' in version `normal'
 (Font)                  OML/lmm/m/it --> FML/futmi/m/it on input line 59.
 LaTeX Font Info:    Encoding `OML' has changed to `FML' for symbol font
 (Font)              `letters' in the math version `bold' on input line 59.
 LaTeX Font Info:    Overwriting symbol font `letters' in version `bold'
 (Font)                  OML/lmm/b/it --> FML/futmi/m/it on input line 59.
 \symotherletters=\mathgroup4
 LaTeX Font Info:    Overwriting symbol font `letters' in version `bold'
 (Font)                  FML/futmi/m/it --> FML/futmi/b/it on input line 61.
 LaTeX Font Info:    Overwriting symbol font `otherletters' in version `bold'
 (Font)                  FML/futm/m/it --> FML/futm/b/it on input line 62.
 LaTeX Font Info:    Redeclaring math symbol \Gamma on input line 63.
 LaTeX Font Info:    Redeclaring math symbol \Delta on input line 64.
 LaTeX Font Info:    Redeclaring math symbol \Theta on input line 65.
 LaTeX Font Info:    Redeclaring math symbol \Lambda on input line 66.
 LaTeX Font Info:    Redeclaring math symbol \Xi on input line 67.
 LaTeX Font Info:    Redeclaring math symbol \Pi on input line 68.
 LaTeX Font Info:    Redeclaring math symbol \Sigma on input line 69.
 LaTeX Font Info:    Redeclaring math symbol \Upsilon on input line 70.
 LaTeX Font Info:    Redeclaring math symbol \Phi on input line 71.
 LaTeX Font Info:    Redeclaring math symbol \Psi on input line 72.
 LaTeX Font Info:    Redeclaring math symbol \Omega on input line 73.
 LaTeX Font Info:    Redeclaring symbol font `symbols' on input line 113.
 LaTeX Font Info:    Encoding `OMS' has changed to `FMS' for symbol font
 (Font)              `symbols' in the math version `normal' on input line 113.
 LaTeX Font Info:    Overwriting symbol font `symbols' in version `normal'
 (Font)                  OMS/lmsy/m/n --> FMS/futm/m/n on input line 113.
 LaTeX Font Info:    Encoding `OMS' has changed to `FMS' for symbol font
 (Font)              `symbols' in the math version `bold' on input line 113.
 LaTeX Font Info:    Overwriting symbol font `symbols' in version `bold'
 (Font)                  OMS/lmsy/b/n --> FMS/futm/m/n on input line 113.
 LaTeX Font Info:    Redeclaring symbol font `largesymbols' on input line 114.
 LaTeX Font Info:    Encoding `OMX' has changed to `FMX' for symbol font
 (Font)              `largesymbols' in the math version `normal' on input line 1
 14.
 LaTeX Font Info:    Overwriting symbol font `largesymbols' in version `normal'
 (Font)                  OMX/lmex/m/n --> FMX/futm/m/n on input line 114.
 LaTeX Font Info:    Encoding `OMX' has changed to `FMX' for symbol font
 (Font)              `largesymbols' in the math version `bold' on input line 114
 .
 LaTeX Font Info:    Overwriting symbol font `largesymbols' in version `bold'
 (Font)                  OMX/lmex/m/n --> FMX/futm/m/n on input line 114.
 LaTeX Font Info:    Redeclaring math alphabet \mathbf on input line 115.
 LaTeX Font Info:    Overwriting math alphabet `\mathbf' in version `normal'
 (Font)                  OT1/lmr/bx/n --> T1/futs/bx/n on input line 115.
 LaTeX Font Info:    Overwriting math alphabet `\mathbf' in version `bold'
 (Font)                  OT1/lmr/bx/n --> T1/futs/bx/n on input line 115.
 LaTeX Font Info:    Redeclaring math alphabet \mathrm on input line 116.
 LaTeX Font Info:    Redeclaring math alphabet \mathit on input line 117.
 LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `normal'
 (Font)                  OT1/lmr/m/it --> T1/futs/m/it on input line 117.
 LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `bold'
 (Font)                  OT1/lmr/bx/it --> T1/futs/m/it on input line 117.
 LaTeX Font Info:    Redeclaring math alphabet \mathcal on input line 118.
 LaTeX Font Info:    Redeclaring math symbol \parallel on input line 134.
 LaTeX Font Info:    Redeclaring math symbol \hbar on input line 148.
 LaTeX Font Info:    Redeclaring math symbol \varkappa on input line 186.
 LaTeX Font Info:    Redeclaring math symbol \varvarrho on input line 187.
 LaTeX Font Info:    Redeclaring math delimiter \Vert on input line 210.
 LaTeX Font Info:    Redeclaring math delimiter \vert on input line 215.
 LaTeX Font Info:    Redeclaring math delimiter \Downarrow on input line 225.
 LaTeX Font Info:    Redeclaring math delimiter \backslash on input line 227.
 LaTeX Font Info:    Redeclaring math delimiter \rangle on input line 229.
 LaTeX Font Info:    Redeclaring math delimiter \langle on input line 231.
 LaTeX Font Info:    Redeclaring math delimiter \rbrace on input line 233.
 LaTeX Font Info:    Redeclaring math delimiter \lbrace on input line 235.
 LaTeX Font Info:    Redeclaring math delimiter \rceil on input line 237.
 LaTeX Font Info:    Redeclaring math delimiter \lceil on input line 239.
 LaTeX Font Info:    Redeclaring math delimiter \rfloor on input line 241.
 LaTeX Font Info:    Redeclaring math delimiter \lfloor on input line 243.
 LaTeX Font Info:    Redeclaring math accent \acute on input line 247.
 LaTeX Font Info:    Redeclaring math accent \grave on input line 248.
 LaTeX Font Info:    Redeclaring math accent \ddot on input line 249.
 LaTeX Font Info:    Redeclaring math accent \tilde on input line 250.
 LaTeX Font Info:    Redeclaring math accent \bar on input line 251.
 LaTeX Font Info:    Redeclaring math accent \breve on input line 252.
 LaTeX Font Info:    Redeclaring math accent \check on input line 253.
 LaTeX Font Info:    Redeclaring math accent \hat on input line 254.
 LaTeX Font Info:    Redeclaring math accent \dot on input line 255.
 LaTeX Font Info:    Redeclaring math accent \mathring on input line 256.
 \symUfutm=\mathgroup5
 )
 (/usr/share/texlive/texmf-dist/tex/latex/setspace/setspace.sty
 Package: setspace 2011/12/19 v6.7a set line spacing
 )
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphicx.sty
 Package: graphicx 2017/06/01 v1.1a Enhanced LaTeX Graphics (DPC,SPQR)
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphics.sty
 Package: graphics 2017/06/25 v1.2c Standard LaTeX Graphics (DPC,SPQR)
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/trig.sty
 Package: trig 2016/01/03 v1.10 sin cos tan (DPC)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
 File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
 )
 Package graphics Info: Driver file: pdftex.def on input line 99.
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics-def/pdftex.def
 File: pdftex.def 2018/01/08 v1.0l Graphics/color driver for pdftex
 ))
 \Gin@req@height=\dimen106
 \Gin@req@width=\dimen107
 )
 (/usr/share/texlive/texmf-dist/tex/latex/xcolor/xcolor.sty
 Package: xcolor 2016/05/11 v2.12 LaTeX color extensions (UK)
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/color.cfg
 File: color.cfg 2016/01/02 v1.6 sample color configuration
 )
 Package xcolor Info: Driver file: pdftex.def on input line 225.
 Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1348.
 Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1352.
 Package xcolor Info: Model `RGB' extended on input line 1364.
 Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1366.
 Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1367.
 Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1368.
 Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1369.
 Package xcolor Info: Model `Gray' substituted by `gray' on input line 1370.
 Package xcolor Info: Model `wave' substituted by `hsb' on input line 1371.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/subfig/subfig.sty
 Package: subfig 2005/06/28 ver: 1.3 subfig package
 
 (/usr/share/texlive/texmf-dist/tex/latex/caption/caption.sty
 Package: caption 2016/02/21 v3.3-144 Customizing captions (AR)
 
 (/usr/share/texlive/texmf-dist/tex/latex/caption/caption3.sty
 Package: caption3 2016/05/22 v1.7-166 caption3 kernel (AR)
 Package caption3 Info: TeX engine: e-TeX on input line 67.
 \captionmargin=\dimen108
 \captionmargin@=\dimen109
 \captionwidth=\dimen110
 \caption@tempdima=\dimen111
 \caption@indent=\dimen112
 \caption@parindent=\dimen113
 \caption@hangindent=\dimen114
 )
 \c@ContinuedFloat=\count99
 )
 \c@KVtest=\count100
 \sf@farskip=\skip50
 \sf@captopadj=\dimen115
 \sf@capskip=\skip51
 \sf@nearskip=\skip52
 \c@subfigure=\count101
 \c@subfigure@save=\count102
 \c@lofdepth=\count103
 \c@subtable=\count104
 \c@subtable@save=\count105
 \c@lotdepth=\count106
 \sf@top=\skip53
 \sf@bottom=\skip54
 )
 (/usr/share/texlive/texmf-dist/tex/latex/booktabs/booktabs.sty
 Package: booktabs 2016/04/27 v1.618033 publication quality tables
 \heavyrulewidth=\dimen116
 \lightrulewidth=\dimen117
 \cmidrulewidth=\dimen118
 \belowrulesep=\dimen119
 \belowbottomsep=\dimen120
 \aboverulesep=\dimen121
 \abovetopsep=\dimen122
 \cmidrulesep=\dimen123
 \cmidrulekern=\dimen124
 \defaultaddspace=\dimen125
 \@cmidla=\count107
 \@cmidlb=\count108
 \@aboverulesep=\dimen126
 \@belowrulesep=\dimen127
 \@thisruleclass=\count109
 \@lastruleclass=\count110
 \@thisrulewidth=\dimen128
 )
 (/usr/share/texlive/texmf-dist/tex/latex/lipsum/lipsum.sty
 Package: lipsum 2014/07/27 v1.3 150 paragraphs of Lorem Ipsum dummy text
 \c@lips@count=\count111
 )
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.sty
 Package: microtype 2018/01/14 v2.7a Micro-typographical refinements (RS)
 \MT@toks=\toks18
 \MT@count=\count112
 LaTeX Info: Redefining \textls on input line 793.
 \MT@outer@kern=\dimen129
 LaTeX Info: Redefining \textmicrotypecontext on input line 1339.
 \MT@listname@count=\count113
 
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype-pdftex.def
 File: microtype-pdftex.def 2018/01/14 v2.7a Definitions specific to pdftex (RS)
 
 LaTeX Info: Redefining \lsstyle on input line 913.
 LaTeX Info: Redefining \lslig on input line 913.
 \MT@outer@space=\skip55
 )
 Package microtype Info: Loading configuration file microtype.cfg.
 
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.cfg
 File: microtype.cfg 2018/01/14 v2.7a microtype main configuration file (RS)
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/url/url.sty
 \Urlmuskip=\muskip10
 Package: url 2013/09/16  ver 3.4  Verb mode for urls, etc.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty
 Package: fancyhdr 2017/06/30 v3.9a Extensive control of page headers and footer
 s
 \f@nch@headwidth=\skip56
 \f@nch@O@elh=\skip57
 \f@nch@O@erh=\skip58
 \f@nch@O@olh=\skip59
 \f@nch@O@orh=\skip60
 \f@nch@O@elf=\skip61
 \f@nch@O@erf=\skip62
 \f@nch@O@olf=\skip63
 \f@nch@O@orf=\skip64
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.sty
 \lst@mode=\count114
 \lst@gtempboxa=\box26
 \lst@token=\toks19
 \lst@length=\count115
 \lst@currlwidth=\dimen130
 \lst@column=\count116
 \lst@pos=\count117
 \lst@lostspace=\dimen131
 \lst@width=\dimen132
 \lst@newlines=\count118
 \lst@lineno=\count119
 \lst@maxwidth=\dimen133
 
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty
 File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz)
 \c@lstnumber=\count120
 \lst@skipnumbers=\count121
 \lst@framebox=\box27
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.cfg
 File: listings.cfg 2015/06/04 1.6 listings configuration
 ))
 Package: listings 2015/06/04 1.6 (Carsten Heinz)
 
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty
 File: lstlang1.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty
 File: lstlang2.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty
 File: lstlang3.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty
 File: lstlang1.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty
 File: lstlang2.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty
 File: lstlang3.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty
 File: lstlang1.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty
 File: lstlang2.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty
 File: lstlang3.sty 2015/06/04 1.6 listings language file
 )
 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty
 File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hyperref.sty
 Package: hyperref 2018/02/06 v6.86b Hypertext links for LaTeX
 
 (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty
 Package: hobsub-hyperref 2016/05/16 v1.14 Bundle oberdiek, subset hyperref (HO)
 
 
 (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty
 Package: hobsub-generic 2016/05/16 v1.14 Bundle oberdiek, subset generic (HO)
 Package: hobsub 2016/05/16 v1.14 Construct package bundles (HO)
 Package: infwarerr 2016/05/16 v1.4 Providing info/warning/error messages (HO)
 Package: ltxcmds 2016/05/16 v1.23 LaTeX kernel commands for general use (HO)
 Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO)
 Package ifluatex Info: LuaTeX not detected.
 Package: ifvtex 2016/05/16 v1.6 Detect VTeX and its facilities (HO)
 Package ifvtex Info: VTeX not detected.
 Package: intcalc 2016/05/16 v1.2 Expandable calculations with integers (HO)
 Package: ifpdf 2017/03/15 v3.2 Provides the ifpdf switch
 Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO)
 Package etexcmds Info: Could not find \expanded.
 (etexcmds)             That can mean that you are not using pdfTeX 1.50 or
 (etexcmds)             that some package has redefined \expanded.
 (etexcmds)             In the latter case, load this package earlier.
 Package: kvsetkeys 2016/05/16 v1.17 Key value parser (HO)
 Package: kvdefinekeys 2016/05/16 v1.4 Define keys (HO)
 Package: pdftexcmds 2018/01/21 v0.26 Utility functions of pdfTeX for LuaTeX (HO
 )
 Package pdftexcmds Info: LuaTeX not detected.
 Package pdftexcmds Info: \pdf@primitive is available.
 Package pdftexcmds Info: \pdf@ifprimitive is available.
 Package pdftexcmds Info: \pdfdraftmode found.
 Package: pdfescape 2016/05/16 v1.14 Implements pdfTeX's escape features (HO)
 Package: bigintcalc 2016/05/16 v1.4 Expandable calculations on big integers (HO
 )
 Package: bitset 2016/05/16 v1.2 Handle bit-vector datatype (HO)
 Package: uniquecounter 2016/05/16 v1.3 Provide unlimited unique counter (HO)
 )
 Package hobsub Info: Skipping package `hobsub' (already loaded).
 Package: letltxmacro 2016/05/16 v1.5 Let assignment for LaTeX macros (HO)
 Package: hopatch 2016/05/16 v1.3 Wrapper for package hooks (HO)
 Package: xcolor-patch 2016/05/16 xcolor patch
 Package: atveryend 2016/05/16 v1.9 Hooks at the very end of document (HO)
 Package atveryend Info: \enddocument detected (standard20110627).
 Package: atbegshi 2016/06/09 v1.18 At begin shipout hook (HO)
 Package: refcount 2016/05/16 v3.5 Data extraction from label references (HO)
 Package: hycolor 2016/05/16 v1.8 Color options for hyperref/bookmark (HO)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty
 Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional
 )
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/auxhook.sty
 Package: auxhook 2016/05/16 v1.4 Hooks for auxiliary files (HO)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/kvoptions.sty
 Package: kvoptions 2016/05/16 v3.12 Key value format for package options (HO)
 )
 \@linkdim=\dimen134
 \Hy@linkcounter=\count122
 \Hy@pagecounter=\count123
 
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/pd1enc.def
 File: pd1enc.def 2018/02/06 v6.86b Hyperref: PDFDocEncoding definition (HO)
 Now handling font encoding PD1 ...
 ... no UTF-8 mapping file for font encoding PD1
 )
 \Hy@SavedSpaceFactor=\count124
 
 (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/hyperref.cfg
 File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive
 )
 Package hyperref Info: Hyper figures OFF on input line 4509.
 Package hyperref Info: Link nesting OFF on input line 4514.
 Package hyperref Info: Hyper index ON on input line 4517.
 Package hyperref Info: Plain pages OFF on input line 4524.
 Package hyperref Info: Backreferencing OFF on input line 4529.
 Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
 Package hyperref Info: Bookmarks ON on input line 4762.
 \c@Hy@tempcnt=\count125
 LaTeX Info: Redefining \url on input line 5115.
 \XeTeXLinkMargin=\dimen135
 \Fld@menulength=\count126
 \Field@Width=\dimen136
 \Fld@charsize=\dimen137
 Package hyperref Info: Hyper figures OFF on input line 6369.
 Package hyperref Info: Link nesting OFF on input line 6374.
 Package hyperref Info: Hyper index ON on input line 6377.
 Package hyperref Info: backreferencing OFF on input line 6384.
 Package hyperref Info: Link coloring OFF on input line 6389.
 Package hyperref Info: Link coloring with OCG OFF on input line 6394.
 Package hyperref Info: PDF/A mode OFF on input line 6399.
 LaTeX Info: Redefining \ref on input line 6439.
 LaTeX Info: Redefining \pageref on input line 6443.
 \Hy@abspage=\count127
 \c@Item=\count128
 \c@Hfootnote=\count129
 )
 Package hyperref Info: Driver (autodetected): hpdftex.
 
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hpdftex.def
 File: hpdftex.def 2018/02/06 v6.86b Hyperref driver for pdfTeX
 \Fld@listcount=\count130
 \c@bookmark@seq@number=\count131
 
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty
 Package: rerunfilecheck 2016/05/16 v1.8 Rerun checks for auxiliary files (HO)
 Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2
 82.
 )
 \Hy@SectionHShift=\skip65
 )
 Package hyperref Info: Option `colorlinks' set `true' on input line 105.
 
 (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pdfpages.sty
 Package: pdfpages 2017/10/31 v0.5l Insert pages of external PDF documents (AM)
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ifthen.sty
 Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/tools/calc.sty
 Package: calc 2014/10/28 v4.3 Infix arithmetic (KKT,FJ)
 \calc@Acount=\count132
 \calc@Bcount=\count133
 \calc@Adimen=\dimen138
 \calc@Bdimen=\dimen139
 \calc@Askip=\skip66
 \calc@Bskip=\skip67
 LaTeX Info: Redefining \setlength on input line 80.
 LaTeX Info: Redefining \addtolength on input line 81.
 \calc@Ccount=\count134
 \calc@Cskip=\skip68
 )
 (/usr/share/texlive/texmf-dist/tex/latex/eso-pic/eso-pic.sty
 Package: eso-pic 2015/07/21 v2.0g eso-pic (RN)
 )
 \AM@pagewidth=\dimen140
 \AM@pageheight=\dimen141
 
 (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pppdftex.def
 File: pppdftex.def 2017/10/31 v0.5l Pdfpages driver for pdfTeX (AM)
 )
 \AM@pagebox=\box28
 \AM@global@opts=\toks20
 \AM@toc@title=\toks21
 \c@AM@survey=\count135
 \AM@templatesizebox=\box29
 )
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bookmark.sty
 Package: bookmark 2016/05/17 v1.26 PDF bookmarks (HO)
 
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bkm-pdftex.def
 File: bkm-pdftex.def 2016/05/17 v1.26 bookmark driver for pdfTeX (HO)
 \BKM@id=\count136
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex
 \pgfutil@everybye=\toks22
 \pgfutil@tempdima=\dimen142
 \pgfutil@tempdimb=\dimen143
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common-lists.t
 ex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def
 \pgfutil@abb=\box30
 (/usr/share/texlive/texmf-dist/tex/latex/ms/everyshi.sty
 Package: everyshi 2001/05/15 v3.00 EveryShipout Package (MS)
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex
 Package: pgfrcs 2015/08/07 v3.0.1a (rcs-revision 1.31)
 ))
 Package: pgf 2015/08/07 v3.0.1a (rcs-revision 1.15)
 
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex
 Package: pgfsys 2014/07/09 v3.0.1a (rcs-revision 1.48)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex
 \pgfkeys@pathtoks=\toks23
 \pgfkeys@temptoks=\toks24
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeysfiltered.code.t
 ex
 \pgfkeys@tmptoks=\toks25
 ))
 \pgf@x=\dimen144
 \pgf@y=\dimen145
 \pgf@xa=\dimen146
 \pgf@ya=\dimen147
 \pgf@xb=\dimen148
 \pgf@yb=\dimen149
 \pgf@xc=\dimen150
 \pgf@yc=\dimen151
 \w@pgf@writea=\write3
 \r@pgf@reada=\read1
 \c@pgf@counta=\count137
 \c@pgf@countb=\count138
 \c@pgf@countc=\count139
 \c@pgf@countd=\count140
 \t@pgf@toka=\toks26
 \t@pgf@tokb=\toks27
 \t@pgf@tokc=\toks28
  (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg
 File: pgf.cfg 2008/05/14  (rcs-revision 1.7)
 )
 Driver file for pgf: pgfsys-pdftex.def
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def
 File: pgfsys-pdftex.def 2014/10/11  (rcs-revision 1.35)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.de
 f
 File: pgfsys-common-pdf.def 2013/10/10  (rcs-revision 1.13)
 )))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.
 tex
 File: pgfsyssoftpath.code.tex 2013/09/09  (rcs-revision 1.9)
 \pgfsyssoftpath@smallbuffer@items=\count141
 \pgfsyssoftpath@bigbuffer@items=\count142
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.
 tex
 File: pgfsysprotocol.code.tex 2006/10/16  (rcs-revision 1.4)
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex
 Package: pgfcore 2010/04/11 v3.0.1a (rcs-revision 1.7)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex
 \pgfmath@dimen=\dimen152
 \pgfmath@count=\count143
 \pgfmath@box=\box31
 \pgfmath@toks=\toks29
 \pgfmath@stack@operand=\toks30
 \pgfmath@stack@operation=\toks31
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code
 .tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonomet
 ric.code.tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.cod
 e.tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison
 .code.tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.
 tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code
 .tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.
 tex)
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerari
 thmetics.code.tex)))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex
 \c@pgfmathroundto@lastzeros=\count144
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.te
 x
 File: pgfcorepoints.code.tex 2013/10/07  (rcs-revision 1.27)
 \pgf@picminx=\dimen153
 \pgf@picmaxx=\dimen154
 \pgf@picminy=\dimen155
 \pgf@picmaxy=\dimen156
 \pgf@pathminx=\dimen157
 \pgf@pathmaxx=\dimen158
 \pgf@pathminy=\dimen159
 \pgf@pathmaxy=\dimen160
 \pgf@xx=\dimen161
 \pgf@xy=\dimen162
 \pgf@yx=\dimen163
 \pgf@yy=\dimen164
 \pgf@zx=\dimen165
 \pgf@zy=\dimen166
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.
 code.tex
 File: pgfcorepathconstruct.code.tex 2013/10/07  (rcs-revision 1.29)
 \pgf@path@lastx=\dimen167
 \pgf@path@lasty=\dimen168
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code
 .tex
 File: pgfcorepathusage.code.tex 2014/11/02  (rcs-revision 1.24)
 \pgf@shorten@end@additional=\dimen169
 \pgf@shorten@start@additional=\dimen170
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.te
 x
 File: pgfcorescopes.code.tex 2015/05/08  (rcs-revision 1.46)
 \pgfpic=\box32
 \pgf@hbox=\box33
 \pgf@layerbox@main=\box34
 \pgf@picture@serial@count=\count145
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.c
 ode.tex
 File: pgfcoregraphicstate.code.tex 2014/11/02  (rcs-revision 1.12)
 \pgflinewidth=\dimen171
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformation
 s.code.tex
 File: pgfcoretransformations.code.tex 2015/08/07  (rcs-revision 1.20)
 \pgf@pt@x=\dimen172
 \pgf@pt@y=\dimen173
 \pgf@pt@temp=\dimen174
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex
 File: pgfcorequick.code.tex 2008/10/09  (rcs-revision 1.3)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.t
 ex
 File: pgfcoreobjects.code.tex 2006/10/11  (rcs-revision 1.2)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing
 .code.tex
 File: pgfcorepathprocessing.code.tex 2013/09/09  (rcs-revision 1.9)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.te
 x
 File: pgfcorearrows.code.tex 2015/05/14  (rcs-revision 1.43)
 \pgfarrowsep=\dimen175
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex
 File: pgfcoreshade.code.tex 2013/07/15  (rcs-revision 1.15)
 \pgf@max=\dimen176
 \pgf@sys@shading@range@num=\count146
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex
 File: pgfcoreimage.code.tex 2013/07/15  (rcs-revision 1.18)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.
 tex
 File: pgfcoreexternal.code.tex 2014/07/09  (rcs-revision 1.21)
 \pgfexternal@startupbox=\box35
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.te
 x
 File: pgfcorelayers.code.tex 2013/07/18  (rcs-revision 1.7)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.c
 ode.tex
 File: pgfcoretransparency.code.tex 2013/09/30  (rcs-revision 1.5)
 )
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.
 tex
 File: pgfcorepatterns.code.tex 2013/11/07  (rcs-revision 1.5)
 )))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex
 File: pgfmoduleshapes.code.tex 2014/03/21  (rcs-revision 1.35)
 \pgfnodeparttextbox=\box36
 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex
 File: pgfmoduleplot.code.tex 2015/08/03  (rcs-revision 1.13)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65
 .sty
 Package: pgfcomp-version-0-65 2007/07/03 v3.0.1a (rcs-revision 1.7)
 \pgf@nodesepstart=\dimen177
 \pgf@nodesepend=\dimen178
 )
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18
 .sty
 Package: pgfcomp-version-1-18 2007/07/23 v3.0.1a (rcs-revision 1.1)
 )) (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgffor.sty
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex))
 (/usr/share/texlive/texmf-dist/tex/latex/pgf/math/pgfmath.sty
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex
 Package: pgffor 2013/12/13 v3.0.1a (rcs-revision 1.25)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)
 \pgffor@iter=\dimen179
 \pgffor@skip=\dimen180
 \pgffor@stack=\toks32
 \pgffor@toks=\toks33
 ))
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex
 Package: tikz 2015/08/07 v3.0.1a (rcs-revision 1.151)
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers
 .code.tex
 File: pgflibraryplothandlers.code.tex 2013/08/31 v3.0.1a (rcs-revision 1.20)
 \pgf@plot@mark@count=\count147
 \pgfplotmarksize=\dimen181
 )
 \tikz@lastx=\dimen182
 \tikz@lasty=\dimen183
 \tikz@lastxsaved=\dimen184
 \tikz@lastysaved=\dimen185
 \tikzleveldistance=\dimen186
 \tikzsiblingdistance=\dimen187
 \tikz@figbox=\box37
 \tikz@figbox@bg=\box38
 \tikz@tempbox=\box39
 \tikz@tempbox@bg=\box40
 \tikztreelevel=\count148
 \tikznumberofchildren=\count149
 \tikznumberofcurrentchild=\count150
 \tikz@fig@count=\count151
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex
 File: pgfmodulematrix.code.tex 2013/09/17  (rcs-revision 1.8)
 \pgfmatrixcurrentrow=\count152
 \pgfmatrixcurrentcolumn=\count153
 \pgf@matrix@numberofcolumns=\count154
 )
 \tikz@expandcount=\count155
 
 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik
 zlibrarytopaths.code.tex
 File: tikzlibrarytopaths.code.tex 2008/06/17 v3.0.1a (rcs-revision 1.2)
 )))
 (/usr/share/texlive/texmf-dist/tex/latex/titlesec/titlesec.sty
 Package: titlesec 2016/03/21 v2.10.2 Sectioning titles
 \ttl@box=\box41
 \beforetitleunit=\skip69
 \aftertitleunit=\skip70
 \ttl@plus=\dimen188
 \ttl@minus=\dimen189
 \ttl@toksa=\toks34
 \titlewidth=\dimen190
 \titlewidthlast=\dimen191
 \titlewidthfirst=\dimen192
 )
 (/usr/share/texlive/texmf-dist/tex/latex/titlesec/ttlkeys.def
 File: ttlkeys.def 2016/03/15
 \c@ttlp@side=\count156
 \ttlp@side=\count157
 )
 \c@myparts=\count158
 
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty
 Package: amsmath 2017/09/02 v2.17a AMS math features
 \@mathmargin=\skip71
 
 For additional information on amsmath, use the `?' option.
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty
 Package: amstext 2000/06/29 v2.01 AMS text
 
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty
 File: amsgen.sty 1999/11/30 v2.0 generic functions
 \@emptytoks=\toks35
 \ex@=\dimen193
 ))
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty
 Package: amsbsy 1999/11/29 v1.2d Bold Symbols
 \pmbraise@=\dimen194
 )
 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty
 Package: amsopn 2016/03/08 v2.02 operator names
 )
 \inf@bad=\count159
 LaTeX Info: Redefining \frac on input line 213.
 \uproot@=\count160
 \leftroot@=\count161
 LaTeX Info: Redefining \overline on input line 375.
 \classnum@=\count162
 \DOTSCASE@=\count163
 LaTeX Info: Redefining \ldots on input line 472.
 LaTeX Info: Redefining \dots on input line 475.
 LaTeX Info: Redefining \cdots on input line 596.
 \Mathstrutbox@=\box42
 \strutbox@=\box43
 \big@size=\dimen195
 LaTeX Font Info:    Redeclaring font encoding OML on input line 712.
 LaTeX Font Info:    Redeclaring font encoding OMS on input line 713.
 \macc@depth=\count164
 \c@MaxMatrixCols=\count165
 \dotsspace@=\muskip11
 \c@parentequation=\count166
 \dspbrk@lvl=\count167
 \tag@help=\toks36
 \row@=\count168
 \column@=\count169
 \maxfields@=\count170
 \andhelp@=\toks37
 \eqnshift@=\dimen196
 \alignsep@=\dimen197
 \tagshift@=\dimen198
 \tagwidth@=\dimen199
 \totwidth@=\dimen256
 \lineht@=\dimen257
 \@envbody=\toks38
 \multlinegap=\skip72
 \multlinetaggap=\skip73
 \mathdisplay@stack=\toks39
 LaTeX Info: Redefining \[ on input line 2817.
 LaTeX Info: Redefining \] on input line 2818.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amsfonts.sty
 Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
 \symAMSa=\mathgroup6
 \symAMSb=\mathgroup7
 LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
 (Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
 LaTeX Font Info:    Redeclaring math symbol \square on input line 141.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amssymb.sty
 Package: amssymb 2013/01/14 v3.01 AMS font symbols
 LaTeX Font Info:    Redeclaring math symbol \blacksquare on input line 48.
 LaTeX Font Info:    Redeclaring math symbol \vDash on input line 60.
 LaTeX Font Info:    Redeclaring math symbol \leftleftarrows on input line 63.
 LaTeX Font Info:    Redeclaring math symbol \rightrightarrows on input line 64.
 
 LaTeX Font Info:    Redeclaring math symbol \leqslant on input line 101.
 LaTeX Font Info:    Redeclaring math symbol \geqslant on input line 108.
 LaTeX Font Info:    Redeclaring math symbol \blacktriangleright on input line 1
 20.
 LaTeX Font Info:    Redeclaring math symbol \blacktriangleleft on input line 12
 1.
 LaTeX Font Info:    Redeclaring math symbol \complement on input line 165.
 LaTeX Font Info:    Redeclaring math symbol \intercal on input line 166.
 LaTeX Font Info:    Redeclaring math symbol \nleqslant on input line 181.
 LaTeX Font Info:    Redeclaring math symbol \ngeqslant on input line 182.
 LaTeX Font Info:    Redeclaring math symbol \varsubsetneq on input line 203.
 LaTeX Font Info:    Redeclaring math symbol \subsetneqq on input line 207.
 LaTeX Font Info:    Redeclaring math symbol \nparallel on input line 215.
 LaTeX Font Info:    Redeclaring math symbol \nvDash on input line 221.
 LaTeX Font Info:    Redeclaring math symbol \nexists on input line 235.
 LaTeX Font Info:    Redeclaring math symbol \smallsetminus on input line 251.
 LaTeX Font Info:    Redeclaring math symbol \curvearrowleft on input line 257.
 LaTeX Font Info:    Redeclaring math symbol \curvearrowright on input line 258.
 
 LaTeX Font Info:    Redeclaring math symbol \varkappa on input line 260.
 LaTeX Font Info:    Redeclaring math symbol \hslash on input line 262.
 )
 (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mathtools.sty
 Package: mathtools 2018/01/08 v1.21 mathematical typesetting tools
 
 (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mhsetup.sty
 Package: mhsetup 2017/03/31 v1.3 programming setup (MH)
 )
 LaTeX Info: Thecontrolsequence`\('isalreadyrobust on input line 129.
 LaTeX Info: Thecontrolsequence`\)'isalreadyrobust on input line 129.
 LaTeX Info: Thecontrolsequence`\['isalreadyrobust on input line 129.
 LaTeX Info: Thecontrolsequence`\]'isalreadyrobust on input line 129.
 \g_MT_multlinerow_int=\count171
 \l_MT_multwidth_dim=\dimen258
 \origjot=\skip74
 \l_MT_shortvdotswithinadjustabove_dim=\dimen259
 \l_MT_shortvdotswithinadjustbelow_dim=\dimen260
 \l_MT_above_intertext_sep=\dimen261
 \l_MT_below_intertext_sep=\dimen262
 \l_MT_above_shortintertext_sep=\dimen263
 \l_MT_below_shortintertext_sep=\dimen264
 ))
 (./head/settings_custom.tex
 (/usr/share/texlive/texmf-dist/tex/latex/algorithm2e/algorithm2e.sty
 Package: algorithm2e 2017/07/18 v5.2 algorithms environments
 \c@AlgoLine=\count172
 \algocf@hangindent=\skip75
 
 (/usr/share/texlive/texmf-dist/tex/latex/ifoddpage/ifoddpage.sty
 Package: ifoddpage 2016/04/23 v1.1 Conditionals for odd/even page detection
 \c@checkoddpage=\count173
 )
 (/usr/share/texlive/texmf-dist/tex/latex/tools/xspace.sty
 Package: xspace 2014/10/28 v1.13 Space after command names (DPC,MH)
 )
 (/usr/share/texlive/texmf-dist/tex/latex/relsize/relsize.sty
 Package: relsize 2013/03/29 ver 4.1
 )
 \skiptotal=\skip76
 \skiplinenumber=\skip77
 \skiprule=\skip78
 \skiphlne=\skip79
 \skiptext=\skip80
 \skiplength=\skip81
 \algomargin=\skip82
 \skipalgocfslide=\skip83
 \algowidth=\dimen265
 \inoutsize=\dimen266
 \inoutindent=\dimen267
 \interspacetitleruled=\dimen268
 \interspacealgoruled=\dimen269
 \interspacetitleboxruled=\dimen270
 \algocf@ruledwidth=\skip84
 \algocf@inoutbox=\box44
 \algocf@inputbox=\box45
 \AlCapSkip=\skip85
 \AlCapHSkip=\skip86
 \algoskipindent=\skip87
 \algocf@nlbox=\box46
 \algocf@hangingbox=\box47
 \algocf@untilbox=\box48
 \algocf@skipuntil=\skip88
 \algocf@capbox=\box49
 \algocf@lcaptionbox=\skip89
 \algoheightruledefault=\skip90
 \algoheightrule=\skip91
 \algotitleheightruledefault=\skip92
 \algotitleheightrule=\skip93
 \c@algocfline=\count174
 \c@algocfproc=\count175
 \c@algocf=\count176
 \algocf@algoframe=\box50
 \algocf@algobox=\box51
 )
 (/usr/share/texlive/texmf-dist/tex/latex/float/float.sty
 Package: float 2001/11/08 v1.3d Float enhancements (AL)
 \c@float@type=\count177
 \float@exts=\toks40
 \float@box=\box52
 \@float@everytoks=\toks41
 \@floatcapt=\box53
 )
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/xr-hyper.sty
 Package: xr-hyper 2000/03/22 v6.00beta4 eXternal References (DPC)
 
 
 Package xr-hyper Warning: Load package `hyperref' after `xr-hyper'.
 
 ) (/usr/share/texlive/texmf-dist/tex/latex/makecell/makecell.sty
 Package: makecell 2009/08/03 V0.1e Managing of Tab Column Heads and Cells
 
 (/usr/share/texlive/texmf-dist/tex/latex/tools/array.sty
 Package: array 2016/10/06 v2.4d Tabular extension package (FMi)
 \col@sep=\dimen271
 \extrarowheight=\dimen272
 \NC@list=\toks42
 \extratabsurround=\skip94
 \backup@length=\skip95
 )
 \rotheadsize=\dimen273
 \c@nlinenum=\count178
 \TeXr@lab=\toks43
 )) (./my_thesis.aux
 (./head/dedication.aux) (./head/acknowledgements.aux) (./head/preface.aux)
 (./head/abstracts.aux) (./main/ch_introduction.aux)
 (./main/ch_group_projects.aux) (./main/ch_encode_peaks.aux)
 (./main/ch_smile-seq.aux) (./main/ch_atac-seq.aux
 
 LaTeX Warning: Label `encode_peaks_algo_ndr_extend' multiply defined.
 
 ) (./tail/appendix.aux
 
 LaTeX Warning: Label `suppl_emseq_sp1_10class' multiply defined.
 
 ) (./tail/biblio.aux) (./tail/cv.aux))
 \openout1 = `my_thesis.aux'.
 
 LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for FML/futm/m/it on input line 18.
 LaTeX Font Info:    Try loading font information for FML+futm on input line 18.
 
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutm.fd
 File: fmlfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futm.
 )
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for FMS/futm/m/n on input line 18.
 LaTeX Font Info:    Try loading font information for FMS+futm on input line 18.
 
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmsfutm.fd
 File: fmsfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FMS/futm.
 )
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for FMX/futm/m/n on input line 18.
 LaTeX Font Info:    Try loading font information for FMX+futm on input line 18.
 
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmxfutm.fd
 File: fmxfutm.fd futm-extension
 )
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 18.
 LaTeX Font Info:    Try loading font information for TS1+cmr on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd
 File: ts1cmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions
 )
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 18.
 LaTeX Font Info:    ... okay on input line 18.
 LaTeX Font Info:    Try loading font information for T1+futs on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/t1futs.fd
 File: t1futs.fd 2004/03/02 Fontinst v1.926 font definitions for T1/futs.
 )
 LaTeX Info: Redefining \degres on input line 18.
 LaTeX Info: Redefining \dots on input line 18.
 LaTeX Info: Redefining \up on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
 [Loading MPS to PDF converter (version 2006.09.02).]
 \scratchcounter=\count179
 \scratchdimen=\dimen274
 \scratchbox=\box54
 \nofMPsegments=\count180
 \nofMParguments=\count181
 \everyMPshowfont=\toks44
 \MPscratchCnt=\count182
 \MPscratchDim=\dimen275
 \MPnumerator=\count183
 \makeMPintoPDFobject=\count184
 \everyMPtoPDFconversion=\toks45
 ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty
 Package: epstopdf-base 2016/05/15 v2.6 Base part for package epstopdf
 
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/grfext.sty
 Package: grfext 2016/05/16 v1.2 Manage graphics extensions (HO)
 )
 Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4
 38.
 Package grfext Info: Graphics extension search list:
 (grfext)             [.pdf,.png,.jpg,.mps,.jpeg,.jbig2,.jb2,.PDF,.PNG,.JPG,.JPE
 G,.JBIG2,.JB2,.eps]
 (grfext)             \AppendGraphicsExtensions on input line 456.
 
 (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
 File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
 e
 ))
 Package caption Info: Begin \AtBeginDocument code.
 Package caption Info: subfig package v1.3 is loaded.
 Package caption Info: float package is loaded.
 Package caption Info: hyperref package is loaded.
 Package caption Info: listings package is loaded.
 Package caption Info: End \AtBeginDocument code.
 LaTeX Info: Redefining \microtypecontext on input line 18.
 Package microtype Info: Generating PDF output.
 Package microtype Info: Character protrusion enabled (level 2).
 Package microtype Info: Using default protrusion set `alltext'.
 Package microtype Info: Automatic font expansion enabled (level 2),
 (microtype)             stretch: 20, shrink: 20, step: 1, non-selected.
 Package microtype Info: Using default expansion set `basictext'.
 Package microtype Info: No adjustment of tracking.
 Package microtype Info: No adjustment of interword spacing.
 Package microtype Info: No adjustment of character kerning.
 Package microtype Info: Loading generic protrusion settings for font family
 (microtype)             `futs' (encoding: T1).
 (microtype)             For optimal results, create family-specific settings.
 (microtype)             See the microtype manual for details.
 \c@lstlisting=\count185
 \AtBeginShipoutBox=\box55
 Package hyperref Info: Link coloring ON on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/nameref.sty
 Package: nameref 2016/05/21 v2.44 Cross-referencing by name of section
 
 (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/gettitlestring.sty
 Package: gettitlestring 2016/05/16 v1.5 Cleanup title references (HO)
 )
 \c@section@level=\count186
 )
 LaTeX Info: Redefining \ref on input line 18.
 LaTeX Info: Redefining \pageref on input line 18.
 LaTeX Info: Redefining \nameref on input line 18.
 
 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/pdflscape.sty
 Package: pdflscape 2016/05/14 v0.11 Display of landscape pages in PDF (HO)
 
 (/usr/share/texlive/texmf-dist/tex/latex/graphics/lscape.sty
 Package: lscape 2000/10/22 v3.01 Landscape Pages (DPC)
 )
 Package pdflscape Info: Auto-detected driver: pdftex on input line 81.
 )
 ABD: EveryShipout initializing macros (./head/titlepage.tex
 LaTeX Font Info:    Try loading font information for T1+lmss on input line 5.
 
 (/usr/share/texmf/tex/latex/lm/t1lmss.fd
 File: t1lmss.fd 2009/10/30 v1.6 Font defs for Latin Modern
 )
 Package microtype Info: Loading generic protrusion settings for font family
 (microtype)             `lmss' (encoding: T1).
 (microtype)             For optimal results, create family-specific settings.
 (microtype)             See the microtype manual for details.
 LaTeX Font Info:    Try loading font information for FML+futmi on input line 14
 .
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutmi.fd
 File: fmlfutmi.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futmi.
 )
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 10.07397pt on input line 14.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 7.63599pt on input line 14.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 5.51999pt on input line 14.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 10.07397pt on input line 14.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 7.63599pt on input line 14.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 5.51999pt on input line 14.
 LaTeX Font Info:    Try loading font information for U+msa on input line 14.
 
 (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsa.fd
 File: umsa.fd 2013/01/14 v3.01 AMS symbols A
 )
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msa.cfg
 File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
 )
 LaTeX Font Info:    Try loading font information for U+msb on input line 14.
 
 (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsb.fd
 File: umsb.fd 2013/01/14 v3.01 AMS symbols B
 )
 (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msb.cfg
 File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
 )
 <images/epfl.pdf, id=7, 612.2875pt x 265.99374pt>
 File: images/epfl.pdf Graphic file (type pdf)
 <use images/epfl.pdf>
 Package pdftex.def Info: images/epfl.pdf  used on input line 15.
 (pdftex.def)             Requested size: 113.81102pt x 49.4394pt.
 
 Overfull \hbox (23.99998pt too wide) in paragraph at lines 14--41
  [][] 
  []
 
 [1
 
 
 
 
 
 {/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} <./images/epfl.pdf>])
 \openout2 = `head/dedication.aux'.
 
 
 (./head/dedication.tex [2
 
 
 ]) [3]
 \openout2 = `head/acknowledgements.aux'.
 
  (./head/acknowledgements.tex [0
 
 
 
 ]
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <10.95> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 1.
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <24.88> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 1.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 22.88956pt on input line 1.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 15.89755pt on input line 1.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 11.03998pt on input line 1.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 22.88956pt on input line 1.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 15.89755pt on input line 1.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 11.03998pt on input line 1.
 )pdfTeX warning (ext4): destination with the same identifier (name{page.i}) has
  been already used, duplicate ignored
 <to be read again> 
                    \relax 
 l.25 \include{head/acknowledgements}
                                      [1]
 \openout2 = `head/preface.aux'.
 
  (./head/preface.texpdfTeX warning (ext4): destination with the same identifier
  (name{page.ii}) has been already used, duplicate ignored
 <to be read again> 
                    \relax 
 l.1 \cleardoublepage
                      [2
 
 
 
 ])pdfTeX warning (ext4): destination with the same identifier (name{page.iii}) 
 has been already used, duplicate ignored
 <to be read again> 
                    \relax 
 l.26 \include{head/preface}
                             [3
 
 ]
 \openout2 = `head/abstracts.aux'.
 
  (./head/abstracts.tex [4
 
 
 
 ]
 Package babel Info: Redefining german shorthand "f
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "|
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "~
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "f
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "|
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "~
 (babel)             in language  on input line 18.
  [5
 
 ]
 Package babel Info: Redefining german shorthand "f
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "|
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "~
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "f
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "|
 (babel)             in language  on input line 18.
 Package babel Info: Redefining german shorthand "~
 (babel)             in language  on input line 18.
  [6
 
 ] [7
 
 ] [8
 
 ])
 [9
 
 ] [10
 
 
 ] (./my_thesis.toc [11
 
 ] [12])
 \tf@toc=\write4
 \openout4 = `my_thesis.toc'.
 
  [13] [14
 
 ]
 \openout2 = `main/ch_introduction.aux'.
 
  (./main/ch_introduction.tex)
 [1
 
 
 
 ] [2
 
 
 ]
 \openout2 = `main/ch_group_projects.aux'.
 
  (./main/ch_group_projects.tex
 Chapter 1.
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <14.4> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 12.
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <12> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 17.
-<images/ch_group_projects/mga_figure1.jpeg, id=278, 500.28908pt x 167.6664pt>
+<images/ch_group_projects/mga_figure1.jpeg, id=274, 500.28908pt x 167.6664pt>
 File: images/ch_group_projects/mga_figure1.jpeg Graphic file (type jpg)
 <use images/ch_group_projects/mga_figure1.jpeg>
 Package pdftex.def Info: images/ch_group_projects/mga_figure1.jpeg  used on inp
 ut line 27.
 (pdftex.def)             Requested size: 400.23181pt x 134.13329pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [3
 
 
 
 ]
 [4 <./images/ch_group_projects/mga_figure1.jpeg>] [5]
-<images/ch_group_projects/epd_figure1.jpeg, id=359, 239.03302pt x 194.22563pt>
+<images/ch_group_projects/epd_figure1.jpeg, id=355, 239.03302pt x 194.22563pt>
 File: images/ch_group_projects/epd_figure1.jpeg Graphic file (type jpg)
 <use images/ch_group_projects/epd_figure1.jpeg>
 Package pdftex.def Info: images/ch_group_projects/epd_figure1.jpeg  used on inp
 ut line 72.
 (pdftex.def)             Requested size: 215.12772pt x 174.80144pt.
 
 
 LaTeX Warning: Reference `L' on page 6 undefined on input line 73.
 
 
 LaTeX Warning: Reference `L' on page 6 undefined on input line 73.
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [6
 
  <./images/ch_group_projects/epd_figure1.jpeg>]
-<images/ch_group_projects/epd_motifs.png, id=388, 1156.32pt x 578.16pt>
+<images/ch_group_projects/epd_motifs.png, id=384, 1156.32pt x 578.16pt>
 File: images/ch_group_projects/epd_motifs.png Graphic file (type png)
 <use images/ch_group_projects/epd_motifs.png>
 Package pdftex.def Info: images/ch_group_projects/epd_motifs.png  used on input
  line 124.
 (pdftex.def)             Requested size: 346.89868pt x 173.44933pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [7]
 [8]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [9 <./images/ch_group_projects/epd_motifs.png (PNG copy)>] [10]
-<images/ch_group_projects/pwmscan_flowchart.png, id=435, 2792.031pt x 3705.042p
+<images/ch_group_projects/pwmscan_flowchart.png, id=431, 2792.031pt x 3705.042p
 t>
 File: images/ch_group_projects/pwmscan_flowchart.png Graphic file (type png)
 <use images/ch_group_projects/pwmscan_flowchart.png>
 Package pdftex.def Info: images/ch_group_projects/pwmscan_flowchart.png  used o
 n input line 178.
 (pdftex.def)             Requested size: 279.21945pt x 370.52591pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [11
 
 ]
 [12 <./images/ch_group_projects/pwmscan_flowchart.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [13]
-<images/ch_group_projects/pwmscan_figure_s1.png, id=484, 1348.03625pt x 768.872
+<images/ch_group_projects/pwmscan_figure_s1.png, id=480, 1348.03625pt x 768.872
 5pt>
 File: images/ch_group_projects/pwmscan_figure_s1.png Graphic file (type png)
 <use images/ch_group_projects/pwmscan_figure_s1.png>
 Package pdftex.def Info: images/ch_group_projects/pwmscan_figure_s1.png  used o
 n input line 222.
 (pdftex.def)             Requested size: 269.60248pt x 153.77177pt.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 8.27998pt on input line 235.
 LaTeX Font Info:    Font shape `FMX/futm/m/n' will be
 (Font)              scaled to size 6.99199pt on input line 235.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 8.27998pt on input line 235.
 LaTeX Font Info:    Font shape `U/futm/m/n' will be
 (Font)              scaled to size 6.99199pt on input line 235.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [14 <./images/ch_group_projects/pwmscan_figure_s1.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [15]
 [16]
-<images/ch_group_projects/spark_figure1.pdf, id=548, 1011.78pt x 578.16pt>
+<images/ch_group_projects/spark_figure1.pdf, id=544, 1011.78pt x 578.16pt>
 File: images/ch_group_projects/spark_figure1.pdf Graphic file (type pdf)
 <use images/ch_group_projects/spark_figure1.pdf>
 Package pdftex.def Info: images/ch_group_projects/spark_figure1.pdf  used on in
 put line 311.
 (pdftex.def)             Requested size: 404.70483pt x 231.2599pt.
 
 
 LaTeX Warning: Reference `fig_s07' on page 17 undefined on input line 313.
 
 
 LaTeX Warning: Reference `fig_s07' on page 17 undefined on input line 313.
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [17
 
 ]
 [18 <./images/ch_group_projects/spark_figure1.pdf>]
-<images/ch_group_projects/spark_supplemental_figure2.pdf, id=575, 1156.32pt x 5
+<images/ch_group_projects/spark_supplemental_figure2.pdf, id=571, 1156.32pt x 5
 05.89pt>
 File: images/ch_group_projects/spark_supplemental_figure2.pdf Graphic file (typ
 e pdf)
 <use images/ch_group_projects/spark_supplemental_figure2.pdf>
 Package pdftex.def Info: images/ch_group_projects/spark_supplemental_figure2.pd
 f  used on input line 326.
 (pdftex.def)             Requested size: 462.5198pt x 202.3524pt.
-<images/ch_group_projects/spark_supplemental_figure4.pdf, id=580, 722.7pt x 433
+<images/ch_group_projects/spark_supplemental_figure4.pdf, id=576, 722.7pt x 433
 .62pt>
 File: images/ch_group_projects/spark_supplemental_figure4.pdf Graphic file (typ
 e pdf)
 <use images/ch_group_projects/spark_supplemental_figure4.pdf>
 Package pdftex.def Info: images/ch_group_projects/spark_supplemental_figure4.pd
 f  used on input line 334.
 (pdftex.def)             Requested size: 289.07487pt x 173.44492pt.
-<images/ch_group_projects/spark_supplemental_figure5.pdf, id=585, 722.7pt x 433
+<images/ch_group_projects/spark_supplemental_figure5.pdf, id=581, 722.7pt x 433
 .62pt>
 File: images/ch_group_projects/spark_supplemental_figure5.pdf Graphic file (typ
 e pdf)
 <use images/ch_group_projects/spark_supplemental_figure5.pdf>
 Package pdftex.def Info: images/ch_group_projects/spark_supplemental_figure5.pd
 f  used on input line 342.
 (pdftex.def)             Requested size: 289.07487pt x 173.44492pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [19 <./images/ch_group_projects/spark_supplemental_figure2.pdf>] [20 <./images
 /ch_group_projects/spark_supplemental_figure4.pdf> <./images/ch_group_projects/
 spark_supplemental_figure5.pdf>]) [21]
 \openout2 = `main/ch_encode_peaks.aux'.
 
  (./main/ch_encode_peaks.tex [22
 
 
 
 ]
 Chapter 2.
-<images/ch_encode_peaks/peaklist_peaknumber_GM12878.png, id=654, 1734.48pt x 57
+<images/ch_encode_peaks/peaklist_peaknumber_GM12878.png, id=650, 1734.48pt x 57
 8.16pt>
 File: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png Graphic file (type
  png)
 <use images/ch_encode_peaks/peaklist_peaknumber_GM12878.png>
 Package pdftex.def Info: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png
   used on input line 26.
 (pdftex.def)             Requested size: 520.34802pt x 173.44933pt.
 
 Overfull \hbox (102.66156pt too wide) in paragraph at lines 26--27
  [] 
  []
 
-<images/ch_encode_peaks/peaklist_proportions_GM12878.png, id=659, 1734.48pt x 5
+<images/ch_encode_peaks/peaklist_proportions_GM12878.png, id=655, 1734.48pt x 5
 78.16pt>
 File: images/ch_encode_peaks/peaklist_proportions_GM12878.png Graphic file (typ
 e png)
 <use images/ch_encode_peaks/peaklist_proportions_GM12878.png>
 Package pdftex.def Info: images/ch_encode_peaks/peaklist_proportions_GM12878.pn
 g  used on input line 35.
 (pdftex.def)             Requested size: 520.34802pt x 173.44933pt.
 
 Overfull \hbox (102.66156pt too wide) in paragraph at lines 35--36
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [23
 
 ]
 [24 <./images/ch_encode_peaks/peaklist_peaknumber_GM12878.png (PNG copy)> <./im
 ages/ch_encode_peaks/peaklist_proportions_GM12878.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [25]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [26]
-<images/ch_encode_peaks/MNase_profiles.png, id=729, 1508.63625pt x 1038.279pt>
+<images/ch_encode_peaks/MNase_profiles.png, id=724, 1508.63625pt x 1038.279pt>
 File: images/ch_encode_peaks/MNase_profiles.png Graphic file (type png)
 <use images/ch_encode_peaks/MNase_profiles.png>
 Package pdftex.def Info: images/ch_encode_peaks/MNase_profiles.png  used on inp
 ut line 81.
 (pdftex.def)             Requested size: 377.15814pt x 259.5691pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [27]
 [28 <./images/ch_encode_peaks/MNase_profiles.png>]
-<images/ch_encode_peaks/colocalization_ctcf.png, id=762, 1612.8255pt x 1282.190
+<images/ch_encode_peaks/colocalization_ctcf.png, id=757, 1612.8255pt x 1282.190
 25pt>
 File: images/ch_encode_peaks/colocalization_ctcf.png Graphic file (type png)
 <use images/ch_encode_peaks/colocalization_ctcf.png>
 Package pdftex.def Info: images/ch_encode_peaks/colocalization_ctcf.png  used o
 n input line 108.
 (pdftex.def)             Requested size: 403.20538pt x 320.54678pt.
-<images/ch_encode_peaks/CTCF_ndr_length_rad212.png, id=763, 1011.78pt x 578.16p
+<images/ch_encode_peaks/CTCF_ndr_length_rad212.png, id=758, 1011.78pt x 578.16p
 t>
 File: images/ch_encode_peaks/CTCF_ndr_length_rad212.png Graphic file (type png)
 
 <use images/ch_encode_peaks/CTCF_ndr_length_rad212.png>
 Package pdftex.def Info: images/ch_encode_peaks/CTCF_ndr_length_rad212.png  use
 d on input line 116.
 (pdftex.def)             Requested size: 404.70483pt x 231.2599pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [29]
 [30 <./images/ch_encode_peaks/colocalization_ctcf.png>] [31 <./images/ch_encode
 _peaks/CTCF_ndr_length_rad212.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [32]
-<images/ch_encode_peaks/TF_associations.png, id=812, 600.35721pt x 381.48236pt>
+<images/ch_encode_peaks/TF_associations.png, id=807, 600.35721pt x 381.48236pt>
 
 File: images/ch_encode_peaks/TF_associations.png Graphic file (type png)
 <use images/ch_encode_peaks/TF_associations.png>
 Package pdftex.def Info: images/ch_encode_peaks/TF_associations.png  used on in
 put line 145.
 (pdftex.def)             Requested size: 240.13863pt x 152.59023pt.
-<images/ch_encode_peaks/ctcf_motif_association.png, id=813, 1084.05pt x 847.968
+<images/ch_encode_peaks/ctcf_motif_association.png, id=808, 1084.05pt x 847.968
 pt>
 File: images/ch_encode_peaks/ctcf_motif_association.png Graphic file (type png)
 
 <use images/ch_encode_peaks/ctcf_motif_association.png>
 Package pdftex.def Info: images/ch_encode_peaks/ctcf_motif_association.png  use
 d on input line 153.
 (pdftex.def)             Requested size: 433.61232pt x 339.18118pt.
 
 Overfull \hbox (15.92586pt too wide) in paragraph at lines 153--154
  [] 
  []
 
 
 LaTeX Warning: Float too large for page by 31.83305pt on input line 209.
 
 [33 <./images/ch_encode_peaks/TF_associations.png>] [34 <./images/ch_encode_pea
 ks/ctcf_motif_association.png>] [35]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [36]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [37]
-<images/ch_encode_peaks/ebf1_haib_1.png, id=902, 650.43pt x 867.24pt>
+<images/ch_encode_peaks/ebf1_haib_1.png, id=897, 650.43pt x 867.24pt>
 File: images/ch_encode_peaks/ebf1_haib_1.png Graphic file (type png)
 <use images/ch_encode_peaks/ebf1_haib_1.png>
 Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_1.png  used on input 
 line 234.
 (pdftex.def)             Requested size: 260.16739pt x 346.88986pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [38]
 [39 <./images/ch_encode_peaks/ebf1_haib_1.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [40]
 
 LaTeX Warning: Reference `https://ccg.epfl.ch/mga/hg19/phastcons/phastcons.html
 ' on page 41 undefined on input line 280.
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [41]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [42]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [43]
 LaTeX Font Info:    Font shape `T1/futs/bx/n' in size <8> not available
 (Font)              Font shape `T1/futs/b/n' tried instead on input line 411.
 LaTeX Font Info:    Try loading font information for T1+lmtt on input line 411.
 
 
 (/usr/share/texmf/tex/latex/lm/t1lmtt.fd
 File: t1lmtt.fd 2009/10/30 v1.6 Font defs for Latin Modern
 )
 Package microtype Info: Loading generic protrusion settings for font family
 (microtype)             `lmtt' (encoding: T1).
 (microtype)             For optimal results, create family-specific settings.
 (microtype)             See the microtype manual for details.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [44]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [45]
 Overfull \hbox (9.9085pt too wide) in paragraph at lines 434--435
 \T1/futs/m/n/10.95 (-20) ences were the cor-rected EBF1 peaks (wgEn-codeAwgTf-b
 -sHaibGm12878Ebf1sc137065Pcr1xUniPk
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [46])
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [47]
 [48]
 \openout2 = `main/ch_smile-seq.aux'.
 
  (./main/ch_smile-seq.tex
 Chapter 3.
-<images/ch_smile-seq/figure1.jpg, id=1095, 929.4725pt x 1206.5075pt>
+<images/ch_smile-seq/figure1.jpg, id=1089, 929.4725pt x 1206.5075pt>
 File: images/ch_smile-seq/figure1.jpg Graphic file (type jpg)
 <use images/ch_smile-seq/figure1.jpg>
 Package pdftex.def Info: images/ch_smile-seq/figure1.jpg  used on input line 23
 .
 (pdftex.def)             Requested size: 232.36755pt x 301.62613pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [49
 
 
 
 
 ]
 [50 <./images/ch_smile-seq/figure1.jpg>]
-<images/ch_smile-seq/figure_hmm.png, id=1130, 2081.16266pt x 1075.51627pt>
+<images/ch_smile-seq/figure_hmm.png, id=1125, 2081.16266pt x 1075.51627pt>
 File: images/ch_smile-seq/figure_hmm.png Graphic file (type png)
 <use images/ch_smile-seq/figure_hmm.png>
 Package pdftex.def Info: images/ch_smile-seq/figure_hmm.png  used on input line
  41.
 (pdftex.def)             Requested size: 416.22516pt x 215.09944pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [51 <./images/ch_smile-seq/figure_hmm.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [52]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [53]
-<images/ch_smile-seq/figure2b_3a.png, id=1161, 1994.652pt x 834.11626pt>
+<images/ch_smile-seq/figure2b_3a.png, id=1156, 1994.652pt x 834.11626pt>
 File: images/ch_smile-seq/figure2b_3a.png Graphic file (type png)
 <use images/ch_smile-seq/figure2b_3a.png>
 Package pdftex.def Info: images/ch_smile-seq/figure2b_3a.png  used on input lin
 e 119.
 (pdftex.def)             Requested size: 398.92334pt x 166.8203pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [54]
 [55 <./images/ch_smile-seq/figure2b_3a.png>]) [56]
 \openout2 = `main/ch_atac-seq.aux'.
 
  (./main/ch_atac-seq.tex
 Chapter 4.
 Package hyperref Info: bookmark level for unknown toc defaults to 0 on input li
 ne 5.
-<images/ch_atac-seq/ATAC-seq2.png, id=1217, 335.00156pt x 459.21562pt>
+<images/ch_atac-seq/ATAC-seq2.png, id=1212, 335.00156pt x 459.21562pt>
 File: images/ch_atac-seq/ATAC-seq2.png Graphic file (type png)
 <use images/ch_atac-seq/ATAC-seq2.png>
-Package pdftex.def Info: images/ch_atac-seq/ATAC-seq2.png  used on input line 1
-8.
+Package pdftex.def Info: images/ch_atac-seq/ATAC-seq2.png  used on input line 2
+0.
 (pdftex.def)             Requested size: 234.49948pt x 321.44873pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [57
 
 
 
 
 ]
 [58 <./images/ch_atac-seq/ATAC-seq2.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [59]
-<images/ch_atac-seq/pipeline.png, id=1270, 552.56438pt x 429.85594pt>
-File: images/ch_atac-seq/pipeline.png Graphic file (type png)
-<use images/ch_atac-seq/pipeline.png>
-Package pdftex.def Info: images/ch_atac-seq/pipeline.png  used on input line 57
-.
-(pdftex.def)             Requested size: 276.28151pt x 214.92744pt.
-
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
  [60]
-[61 <./images/ch_atac-seq/pipeline.png>]
-Underfull \hbox (badness 10000) in paragraph at lines 75--76
+Underfull \hbox (badness 10000) in paragraph at lines 81--82
 \T1/futs/m/n/10.95 (-12) com / 10x . ^^\les / samples / cell-[]atac / 1 . 1 . 0
  / atac _ v1 _ pbmc _ 5k / atac _ v1 _ pbmc _ 5k _ possorted _ bam .
  []
 
-
-Underfull \vbox (badness 10000) has occurred while \output is active []
-
- [62]
-<images/ch_atac-seq/em.png, id=1313, 2953.96512pt x 1933.64929pt>
+<images/ch_atac-seq/em.png, id=1278, 2953.96512pt x 1933.64929pt>
 File: images/ch_atac-seq/em.png Graphic file (type png)
 <use images/ch_atac-seq/em.png>
-Package pdftex.def Info: images/ch_atac-seq/em.png  used on input line 87.
+Package pdftex.def Info: images/ch_atac-seq/em.png  used on input line 105.
 (pdftex.def)             Requested size: 295.41382pt x 193.37625pt.
 
-[63 <./images/ch_atac-seq/em.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [64]
+ [61]
+[62 <./images/ch_atac-seq/em.png>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [65]
+ [63]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [66]
+ [64]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [67]
-<images/ch_atac-seq/fragment_lengths.png, id=1372, 1445.4pt x 433.62pt>
+ [65]
+<images/ch_atac-seq/fragment_lengths.png, id=1336, 1445.4pt x 433.62pt>
 File: images/ch_atac-seq/fragment_lengths.png Graphic file (type png)
 <use images/ch_atac-seq/fragment_lengths.png>
 Package pdftex.def Info: images/ch_atac-seq/fragment_lengths.png  used on input
- line 250.
+ line 257.
 (pdftex.def)             Requested size: 433.62335pt x 130.087pt.
 
-Overfull \hbox (15.93689pt too wide) in paragraph at lines 250--251
+Overfull \hbox (15.93689pt too wide) in paragraph at lines 257--258
  [] 
  []
 
-<images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png, id=1373, 867.24pt x 650
+<images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png, id=1337, 867.24pt x 650
 .43pt>
 File: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png Graphic file (type 
 png)
 <use images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png>
 Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png 
- used on input line 259.
+ used on input line 266.
 (pdftex.def)             Requested size: 346.88986pt x 260.16739pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [68 <./images/ch_atac-seq/fragment_lengths.png (PNG copy)>] [69 <./images/ch_a
-tac-seq/ctcf_motifs_10e-6_aggregations.png (PNG copy)>]
-<images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png, id=1403, 1300.86pt x 650.4
+ [66]
+[67 <./images/ch_atac-seq/fragment_lengths.png (PNG copy)>] [68 <./images/ch_at
+ac-seq/ctcf_motifs_10e-6_aggregations.png (PNG copy)>]
+<images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png, id=1372, 1300.86pt x 650.4
 3pt>
 File: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png Graphic file (type png
 )
 <use images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png>
 Package pdftex.def Info: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png  us
-ed on input line 279.
+ed on input line 286.
 (pdftex.def)             Requested size: 390.26102pt x 195.1305pt.
- [70 <./images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png (PNG copy)>]
+ [69 <./images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png (PNG copy)>]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [71]
-<images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png, id=1431, 867.2
+ [70]
+<images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png, id=1400, 867.2
 4pt x 433.62pt>
 File: images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png Graphic fi
 le (type png)
 <use images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png>
 Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_auc
-_roc.png  used on input line 318.
+_roc.png  used on input line 325.
 (pdftex.def)             Requested size: 346.88986pt x 173.44492pt.
-<images/ch_atac-seq/sp1_motifs_7class.png, id=1432, 1300.86pt x 867.24pt>
+<images/ch_atac-seq/sp1_motifs_7class.png, id=1401, 1300.86pt x 867.24pt>
 File: images/ch_atac-seq/sp1_motifs_7class.png Graphic file (type png)
 <use images/ch_atac-seq/sp1_motifs_7class.png>
 Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_7class.png  used on inpu
-t line 326.
+t line 333.
 (pdftex.def)             Requested size: 455.30783pt x 303.53854pt.
 
-Overfull \hbox (37.62137pt too wide) in paragraph at lines 326--327
+Overfull \hbox (37.62137pt too wide) in paragraph at lines 333--334
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [72]
-Underfull \vbox (badness 5403) has occurred while \output is active []
-
- [73 <./images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png (PNG cop
-y)>]
-[74 <./images/ch_atac-seq/sp1_motifs_7class.png (PNG copy)>]
-<images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png, id=1463, 1300.86pt x 5
+ [71]
+[72 <./images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png (PNG copy
+)>] [73 <./images/ch_atac-seq/sp1_motifs_7class.png (PNG copy)>]
+<images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png, id=1433, 1300.86pt x 5
 78.16pt>
 File: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png Graphic file (type
  png)
 <use images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png>
 Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png
-  used on input line 349.
+  used on input line 356.
 (pdftex.def)             Requested size: 390.26102pt x 173.44933pt.
-<images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png, id=1464, 1300.86pt x 578
+<images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png, id=1434, 1300.86pt x 578
 .16pt>
 File: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png Graphic file (type p
 ng)
 <use images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png>
 Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png  
-used on input line 357.
+used on input line 364.
 (pdftex.def)             Requested size: 455.30783pt x 202.35902pt.
 
-Overfull \hbox (37.62137pt too wide) in paragraph at lines 357--358
+Overfull \hbox (37.62137pt too wide) in paragraph at lines 364--365
  [] 
  []
 
-
-Underfull \vbox (badness 10000) has occurred while \output is active []
-
- [75]
-[76 <./images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png (PNG copy)> <./im
-ages/ch_atac-seq/ctcf_motifs_6class_shift_flip.png (PNG copy)>]
-<images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png, id=1491, 1300.8
+[74 <./images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png (PNG copy)>]
+[75 <./images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png (PNG copy)>]
+<images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png, id=1457, 1300.8
 6pt x 867.24pt>
 File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png Graphic fil
 e (type png)
 <use images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png>
 Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas
-s_2.png  used on input line 379.
+s_2.png  used on input line 386.
 (pdftex.def)             Requested size: 390.26102pt x 260.17401pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [77]
-[78 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png (PNG copy)
+ [76]
+[77 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png (PNG copy)
 >]
 
-LaTeX Warning: Reference `berest_quantification_2018' on page 79 undefined on i
-nput line 395.
+LaTeX Warning: Reference `berest_quantification_2018' on page 78 undefined on i
+nput line 402.
 
-<images/ch_atac-seq/data_classCTCF_8class.png, id=1511, 1445.4pt x 722.7pt>
+<images/ch_atac-seq/data_classCTCF_8class.png, id=1478, 1445.4pt x 722.7pt>
 File: images/ch_atac-seq/data_classCTCF_8class.png Graphic file (type png)
 <use images/ch_atac-seq/data_classCTCF_8class.png>
 Package pdftex.def Info: images/ch_atac-seq/data_classCTCF_8class.png  used on 
-input line 404.
+input line 411.
 (pdftex.def)             Requested size: 433.62335pt x 216.81166pt.
 
-Overfull \hbox (15.93689pt too wide) in paragraph at lines 404--405
+Overfull \hbox (15.93689pt too wide) in paragraph at lines 411--412
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [79]
-[80 <./images/ch_atac-seq/data_classCTCF_8class.png (PNG copy)>]
+ [78]
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [81]
-LaTeX Font Info:    Try loading font information for TS1+futs on input line 444
+ [79 <./images/ch_atac-seq/data_classCTCF_8class.png (PNG copy)>]
+Underfull \vbox (badness 10000) has occurred while \output is active []
+
+ [80]
+LaTeX Font Info:    Try loading font information for TS1+futs on input line 451
 .
 
 (/usr/share/texlive/texmf-dist/tex/latex/fourier/ts1futs.fd
 File: ts1futs.fd 2004/03/26 Fontinst v1.926 font definitions for TS1/futs.
 )
 Package microtype Info: Loading generic protrusion settings for font family
 (microtype)             `futs' (encoding: TS1).
 (microtype)             For optimal results, create family-specific settings.
 (microtype)             See the microtype manual for details.
- [82])
+ [81]
+Underfull \hbox (badness 3343) in paragraph at lines 493--493
+\T1/futs/m/n/10.95 (+20) FOSL2, JUNB, JUN::JUNB, FOSL1::JUND, FOS::JUN,
+ []
+
+
+Overfull \hbox (6.68097pt too wide) in paragraph at lines 485--516
+ [] 
+ []
+
+
+Underfull \vbox (badness 10000) has occurred while \output is active []
+
+ [82]
+[83])
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [83]
+ [84]
 Overfull \vbox (45.20699pt too high) has occurred while \output is active []
 
 
-[84]
+[85] [86
+
+
+]
 \openout2 = `tail/appendix.aux'.
 
  (./tail/appendix.tex
 Appendix A.
-<images/ch_smile-seq/figure_s4_reproduced.png, id=1605, 1413.53291pt x 783.5889
+<images/ch_smile-seq/figure_s4_reproduced.png, id=1598, 1413.53291pt x 783.5889
 pt>
 File: images/ch_smile-seq/figure_s4_reproduced.png Graphic file (type png)
 <use images/ch_smile-seq/figure_s4_reproduced.png>
 Package pdftex.def Info: images/ch_smile-seq/figure_s4_reproduced.png  used on 
 input line 13.
 (pdftex.def)             Requested size: 424.06316pt x 235.07848pt.
 
 Overfull \hbox (6.3767pt too wide) in paragraph at lines 13--14
  [] 
  []
 
 <images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_allpeak
-s_EM_4class_15shift_flip.png, id=1610, 602.25pt x 903.375pt>
+s_EM_4class_15shift_flip.png, id=1603, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_al
 lpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_all
 peaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUni
 Pk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on input line 31.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 
 Underfull \vbox (badness 5652) has occurred while \output is active []
 
- [85
-
-
+ [87
 
 
  <./images/ch_smile-seq/figure_s4_reproduced.png>]
 <images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_GM12878
-_allpeaks_EM_4class_15shift_flip.png, id=1617, 602.25pt x 903.375pt>
+_allpeaks_EM_4class_15shift_flip.png, id=1609, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_GM
 12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_GM1
 2878_allpeaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1I
 ggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on input lin
 e 39.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [86 <./images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_
+ [88 <./images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_
 allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
 <images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM12878_allpe
-aks_EM_4class_15shift_flip.png, id=1622, 602.25pt x 903.375pt>
+aks_EM_4class_15shift_flip.png, id=1614, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM12878_
 allpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM12878_a
 llpeaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosU
 niPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on input line 47.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [87 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_
+ [89 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_
 GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
 <images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_GM12878_
-allpeaks_EM_4class_15shift_flip.png, id=1627, 602.25pt x 903.375pt>
+allpeaks_EM_4class_15shift_flip.png, id=1619, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_GM1
 2878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_GM12
 878_allpeaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIg
 gmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on input line
  55.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [88 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM1287
+ [90 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM1287
 8_allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
 <images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_MNase_GM
-12878_allpeaks_EM_4class_15shift_flip.png, id=1632, 602.25pt x 903.375pt>
+12878_allpeaks_EM_4class_15shift_flip.png, id=1624, 602.25pt x 903.375pt>
 File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_MNa
 se_GM12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png)
 <use images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_MNas
 e_GM12878_allpeaks_EM_4class_15shift_flip.png>
 Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1
 a300IggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png  used on inpu
 t line 63.
 (pdftex.def)             Requested size: 301.12425pt x 451.6864pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [89 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_G
+ [91 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_G
 M12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
-<images/ch_encode_peaks/ctcf_ndr.png, id=1637, 433.62pt x 578.16pt>
+<images/ch_encode_peaks/ctcf_ndr.png, id=1630, 433.62pt x 578.16pt>
 File: images/ch_encode_peaks/ctcf_ndr.png Graphic file (type png)
 <use images/ch_encode_peaks/ctcf_ndr.png>
 Package pdftex.def Info: images/ch_encode_peaks/ctcf_ndr.png  used on input lin
 e 71.
 (pdftex.def)             Requested size: 346.89647pt x 462.52863pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [90 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_M
+ [92 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_M
 Nase_GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>]
-<images/ch_encode_peaks/jund_motif_association.png, id=1642, 1084.05pt x 847.96
+<images/ch_encode_peaks/jund_motif_association.png, id=1635, 1084.05pt x 847.96
 8pt>
 File: images/ch_encode_peaks/jund_motif_association.png Graphic file (type png)
 
 <use images/ch_encode_peaks/jund_motif_association.png>
 Package pdftex.def Info: images/ch_encode_peaks/jund_motif_association.png  use
 d on input line 79.
 (pdftex.def)             Requested size: 433.61232pt x 339.18118pt.
 
 Overfull \hbox (15.92586pt too wide) in paragraph at lines 79--80
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [91 <./images/ch_encode_peaks/ctcf_ndr.png (PNG copy)>]
-<images/ch_encode_peaks/ebf1_haib_3.png, id=1648, 650.43pt x 289.08pt>
+ [93 <./images/ch_encode_peaks/ctcf_ndr.png (PNG copy)>]
+<images/ch_encode_peaks/ebf1_haib_3.png, id=1640, 650.43pt x 289.08pt>
 File: images/ch_encode_peaks/ebf1_haib_3.png Graphic file (type png)
 <use images/ch_encode_peaks/ebf1_haib_3.png>
 Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_3.png  used on input 
 line 87.
 (pdftex.def)             Requested size: 260.16739pt x 115.62994pt.
-<images/ch_encode_peaks/MA0154_3.png, id=1649, 722.7pt x 361.35pt>
+<images/ch_encode_peaks/MA0154_3.png, id=1641, 722.7pt x 361.35pt>
 File: images/ch_encode_peaks/MA0154_3.png Graphic file (type png)
 <use images/ch_encode_peaks/MA0154_3.png>
 Package pdftex.def Info: images/ch_encode_peaks/MA0154_3.png  used on input lin
 e 95.
 (pdftex.def)             Requested size: 361.3491pt x 180.67456pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [92 <./images/ch_encode_peaks/jund_motif_association.png>]
-<images/ch_encode_peaks/ebf1_haib_2.png, id=1659, 650.43pt x 867.24pt>
+ [94 <./images/ch_encode_peaks/jund_motif_association.png>]
+<images/ch_encode_peaks/ebf1_haib_2.png, id=1651, 650.43pt x 867.24pt>
 File: images/ch_encode_peaks/ebf1_haib_2.png Graphic file (type png)
 <use images/ch_encode_peaks/ebf1_haib_2.png>
 Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_2.png  used on input 
 line 103.
 (pdftex.def)             Requested size: 260.16739pt x 346.88986pt.
-<images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png, id=1660, 1300.86pt x 57
+<images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png, id=1652, 1300.86pt x 57
 8.16pt>
 File: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png Graphic file (type 
 png)
 <use images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png>
 Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png 
  used on input line 113.
 (pdftex.def)             Requested size: 390.26102pt x 173.44933pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [93 <./images/ch_encode_peaks/ebf1_haib_3.png (PNG copy)> <./images/ch_encode_
-peaks/MA0154_3.png>] [94 <./images/ch_encode_peaks/ebf1_haib_2.png (PNG copy)>]
-<images/ch_atac-seq/sp1_motifs_6class_shift_flip.png, id=1670, 1300.86pt x 578.
+ [95 <./images/ch_encode_peaks/ebf1_haib_3.png (PNG copy)> <./images/ch_encode_
+peaks/MA0154_3.png>] [96 <./images/ch_encode_peaks/ebf1_haib_2.png (PNG copy)>]
+<images/ch_atac-seq/sp1_motifs_6class_shift_flip.png, id=1662, 1300.86pt x 578.
 16pt>
 File: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png Graphic file (type pn
 g)
 <use images/ch_atac-seq/sp1_motifs_6class_shift_flip.png>
 Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png  u
 sed on input line 121.
 (pdftex.def)             Requested size: 390.26102pt x 173.44933pt.
-<images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png, id=1671, 5
+<images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png, id=1663, 5
 05.89pt x 578.16pt>
 File: images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png Graphi
 c file (type png)
 <use images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png>
 Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_bes
 t_motifs.png  used on input line 129.
 (pdftex.def)             Requested size: 202.3524pt x 231.2599pt.
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [95 <./images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png (PNG copy)> <./im
+ [97 <./images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png (PNG copy)> <./im
 ages/ch_atac-seq/sp1_motifs_6class_shift_flip.png (PNG copy)>]
-<images/ch_atac-seq/sp1_motifs_10class.png, id=1680, 1300.86pt x 867.24pt>
+<images/ch_atac-seq/sp1_motifs_10class.png, id=1673, 1300.86pt x 867.24pt>
 File: images/ch_atac-seq/sp1_motifs_10class.png Graphic file (type png)
 <use images/ch_atac-seq/sp1_motifs_10class.png>
 Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_10class.png  used on inp
 ut line 137.
 (pdftex.def)             Requested size: 455.30783pt x 303.53854pt.
 
 Overfull \hbox (37.62137pt too wide) in paragraph at lines 137--138
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [96 <./images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png (PNG
+ [98 <./images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png (PNG
  copy)>]
 File: images/ch_atac-seq/sp1_motifs_10class.png Graphic file (type png)
 <use images/ch_atac-seq/sp1_motifs_10class.png>
 Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_10class.png  used on inp
 ut line 145.
 (pdftex.def)             Requested size: 455.30783pt x 303.53854pt.
 
 Overfull \hbox (37.62137pt too wide) in paragraph at lines 145--146
  [] 
  []
 
 
 Underfull \vbox (badness 10000) has occurred while \output is active []
 
- [97 <./images/ch_atac-seq/sp1_motifs_10class.png (PNG copy)>]
-<images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png, id=1692, 1300.86p
+ [99 <./images/ch_atac-seq/sp1_motifs_10class.png (PNG copy)>]
+<images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png, id=1684, 1300.86p
 t x 867.24pt>
 File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png Graphic file 
 (type png)
 <use images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png>
 Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas
 s.png  used on input line 153.
 (pdftex.def)             Requested size: 390.26102pt x 260.17401pt.
-<images/ch_atac-seq/data_classPU1_2class.png, id=1693, 1445.4pt x 722.7pt>
+<images/ch_atac-seq/data_classPU1_2class.png, id=1685, 1445.4pt x 722.7pt>
 File: images/ch_atac-seq/data_classPU1_2class.png Graphic file (type png)
 <use images/ch_atac-seq/data_classPU1_2class.png>
 Package pdftex.def Info: images/ch_atac-seq/data_classPU1_2class.png  used on i
 nput line 161.
 (pdftex.def)             Requested size: 433.62335pt x 216.81166pt.
 
 Overfull \hbox (15.93689pt too wide) in paragraph at lines 161--162
  [] 
  []
 
-<images/ch_atac-seq/data_classjun_3class.png, id=1694, 1445.4pt x 722.7pt>
+<images/ch_atac-seq/data_classjun_3class.png, id=1686, 1445.4pt x 722.7pt>
 File: images/ch_atac-seq/data_classjun_3class.png Graphic file (type png)
 <use images/ch_atac-seq/data_classjun_3class.png>
 Package pdftex.def Info: images/ch_atac-seq/data_classjun_3class.png  used on i
 nput line 169.
 (pdftex.def)             Requested size: 433.62335pt x 216.81166pt.
 
 Overfull \hbox (15.93689pt too wide) in paragraph at lines 169--170
  [] 
  []
 
-) [98] [99 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png (PNG 
-copy)>] [100 <./images/ch_atac-seq/data_classPU1_2class.png (PNG copy)>]
-[101 <./images/ch_atac-seq/data_classjun_3class.png (PNG copy)>] [102
+) [100] [101 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png (PN
+G copy)>] [102 <./images/ch_atac-seq/data_classPU1_2class.png (PNG copy)>]
+[103 <./images/ch_atac-seq/data_classjun_3class.png (PNG copy)>] [104
 
 
 ]
 \openout2 = `tail/biblio.aux'.
 
 
-(./tail/biblio.tex (./my_thesis.bbl [103
+(./tail/biblio.tex (./my_thesis.bbl [105
 
 
 
-] [104] [105] [106] [107] [108]
-[109] [110])) [111]
+] [106] [107] [108] [109] [110]
+[111] [112])) [113]
 \openout2 = `tail/cv.aux'.
 
- (./tail/cv.tex [112
+ (./tail/cv.tex [114
 
 
 
 ]
-<tail/cv.pdf, id=1759, 597.23125pt x 845.1575pt>
+<tail/cv.pdf, id=1751, 597.23125pt x 845.1575pt>
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf>
 Package pdftex.def Info: tail/cv.pdf  used on input line 6.
 (pdftex.def)             Requested size: 597.22978pt x 845.15544pt.
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf>
 Package pdftex.def Info: tail/cv.pdf  used on input line 6.
 (pdftex.def)             Requested size: 597.22978pt x 845.15544pt.
-<tail/cv.pdf, id=1762, page=1, 597.23125pt x 845.1575pt>
+<tail/cv.pdf, id=1754, page=1, 597.23125pt x 845.1575pt>
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf, page 1>
 Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.22978pt x 845.15544pt.
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf, page 1>
 Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.25711pt x 845.19412pt.
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf, page 1>
 Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.25711pt x 845.19412pt.
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf, page 1>
 Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.25711pt x 845.19412pt.
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf, page 1>
 Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6.
 (pdftex.def)             Requested size: 597.25711pt x 845.19412pt.
- [113 <./tail/cv.pdf>]
-<tail/cv.pdf, id=1775, page=2, 597.23125pt x 845.1575pt>
+ [115 <./tail/cv.pdf>]
+<tail/cv.pdf, id=1768, page=2, 597.23125pt x 845.1575pt>
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf, page 2>
 Package pdftex.def Info: tail/cv.pdf , page2 used on input line 6.
 (pdftex.def)             Requested size: 597.25711pt x 845.19412pt.
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf, page 2>
 Package pdftex.def Info: tail/cv.pdf , page2 used on input line 6.
 (pdftex.def)             Requested size: 597.25711pt x 845.19412pt.
 File: tail/cv.pdf Graphic file (type pdf)
 <use tail/cv.pdf, page 2>
 Package pdftex.def Info: tail/cv.pdf , page2 used on input line 6.
 (pdftex.def)             Requested size: 597.25711pt x 845.19412pt.
- [114 <./tail/cv.pdf>])
+ [116 <./tail/cv.pdf>])
 Package atveryend Info: Empty hook `BeforeClearDocument' on input line 82.
 Package atveryend Info: Empty hook `AfterLastShipout' on input line 82.
  (./my_thesis.aux (./head/dedication.aux) (./head/acknowledgements.aux)
 (./head/preface.aux) (./head/abstracts.aux) (./main/ch_introduction.aux)
 (./main/ch_group_projects.aux) (./main/ch_encode_peaks.aux)
 (./main/ch_smile-seq.aux) (./main/ch_atac-seq.aux) (./tail/appendix.aux)
 (./tail/biblio.aux) (./tail/cv.aux))
 Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 82.
 Package atveryend Info: Empty hook `AtEndAfterFileList' on input line 82.
 
 
 LaTeX Warning: There were undefined references.
 
 
 LaTeX Warning: There were multiply-defined labels.
 
 Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 82.
  ) 
 Here is how much of TeX's memory you used:
- 27990 strings out of 492982
- 497594 string characters out of 6134895
- 762147 words of memory out of 5000000
- 30090 multiletter control sequences out of 15000+600000
+ 27979 strings out of 492982
+ 497327 string characters out of 6134895
+ 761740 words of memory out of 5000000
+ 30081 multiletter control sequences out of 15000+600000
  146869 words of font info for 340 fonts, out of 8000000 for 9000
  1141 hyphenation exceptions out of 8191
  57i,24n,79p,2621b,1323s stack positions out of 5000i,500n,10000p,200000b,80000s
 {/usr/share/texmf/fonts/enc/dvips/lm/lm-ec.enc}{/usr/share/texlive/texmf-dist
 /fonts/enc/dvips/base/8r.enc}</usr/share/texlive/texmf-dist/fonts/type1/public/
 fourier/fourier-mcl.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/fouri
 er/fourier-mex.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/fourier/fo
 urier-mlit.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/fourier/fourie
 r-ms.pfb></usr/share/texmf/fonts/type1/public/lm/lmss10.pfb></usr/share/texmf/f
 onts/type1/public/lm/lmss17.pfb></usr/share/texmf/fonts/type1/public/lm/lmtt10.
 pfb></usr/share/texlive/texmf-dist/fonts/type1/adobe/utopia/putb8a.pfb></usr/sh
 are/texlive/texmf-dist/fonts/type1/adobe/utopia/putr8a.pfb></usr/share/texlive/
 texmf-dist/fonts/type1/adobe/utopia/putri8a.pfb>
-Output written on my_thesis.pdf (132 pages, 84877008 bytes).
+Output written on my_thesis.pdf (134 pages, 84847998 bytes).
 PDF statistics:
- 2184 PDF objects out of 2487 (max. 8388607)
- 1884 compressed objects within 19 object streams
- 484 named destinations out of 1000 (max. 500000)
- 33741 words of extra memory for PDF output out of 35830 (max. 10000000)
+ 2171 PDF objects out of 2487 (max. 8388607)
+ 1870 compressed objects within 19 object streams
+ 483 named destinations out of 1000 (max. 500000)
+ 33720 words of extra memory for PDF output out of 35830 (max. 10000000)
 
diff --git a/my_thesis.pdf b/my_thesis.pdf
index ca8fb80..9390d98 100644
Binary files a/my_thesis.pdf and b/my_thesis.pdf differ
diff --git a/my_thesis.synctex.gz b/my_thesis.synctex.gz
index c8adb70..eac9120 100644
Binary files a/my_thesis.synctex.gz and b/my_thesis.synctex.gz differ
diff --git a/my_thesis.toc b/my_thesis.toc
index b76bf6a..432470d 100644
--- a/my_thesis.toc
+++ b/my_thesis.toc
@@ -1,97 +1,95 @@
 \babel@toc {english}{}
 \babel@toc {french}{}
 \babel@toc {english}{}
 \contentsline {chapter}{Acknowledgements}{i}{chapter*.1}
 \contentsline {chapter}{Preface}{iii}{chapter*.2}
 \contentsline {chapter}{Abstract (English/Fran\IeC {\c c}ais/Deutsch)}{v}{chapter*.3}
 \babel@toc {german}{}
 \babel@toc {english}{}
 \babel@toc {french}{}
 \babel@toc {english}{}
 \contentsline {chapter}{Introduction}{1}{chapter*.7}
 \contentsline {chapter}{\numberline {1}Published laboratory projects}{3}{chapter.1}
 \contentsline {chapter}{Published laboratory projects}{3}{chapter.1}
 \contentsline {section}{\numberline {1.1}Mass Genome Annotation repository}{3}{section.1.1}
 \contentsline {subsection}{\numberline {1.1.1}Introduction}{3}{subsection.1.1.1}
 \contentsline {subsection}{\numberline {1.1.2}MGA content and organization}{3}{subsection.1.1.2}
 \contentsline {subsection}{\numberline {1.1.3}Conclusions}{5}{subsection.1.1.3}
 \contentsline {section}{\numberline {1.2}Eukaryotic Promoter Database}{6}{section.1.2}
 \contentsline {subsection}{\numberline {1.2.1}Introduction}{7}{subsection.1.2.1}
 \contentsline {subsection}{\numberline {1.2.2}EPDnew now annotates (some of) your mushrooms and vegetables}{7}{subsection.1.2.2}
 \contentsline {subsection}{\numberline {1.2.3}Increased mapping precision in human}{7}{subsection.1.2.3}
 \contentsline {subsection}{\numberline {1.2.4}Integration of EPDnew with other resources}{9}{subsection.1.2.4}
 \contentsline {subsection}{\numberline {1.2.5}Conclusions}{10}{subsection.1.2.5}
 \contentsline {subsection}{\numberline {1.2.6}Methods}{10}{subsection.1.2.6}
 \contentsline {subsubsection}{Motif occurrence profiles}{10}{subsection.1.2.6}
 \contentsline {section}{\numberline {1.3}PWMScan}{11}{section.1.3}
 \contentsline {subsection}{\numberline {1.3.1}Introduction}{11}{subsection.1.3.1}
 \contentsline {subsection}{\numberline {1.3.2}Data and methods}{13}{subsection.1.3.2}
 \contentsline {subsection}{\numberline {1.3.3}Benchmark}{14}{subsection.1.3.3}
 \contentsline {subsection}{\numberline {1.3.4}Conclusions}{16}{subsection.1.3.4}
 \contentsline {section}{\numberline {1.4}SPar-K}{17}{section.1.4}
 \contentsline {subsection}{\numberline {1.4.1}Introduction}{17}{subsection.1.4.1}
 \contentsline {subsection}{\numberline {1.4.2}Methods}{17}{subsection.1.4.2}
 \contentsline {subsection}{\numberline {1.4.3}Results}{21}{subsection.1.4.3}
 \contentsline {subsection}{\numberline {1.4.4}Conclusion}{21}{subsection.1.4.4}
 \contentsline {chapter}{\numberline {2}ENCODE peaks analysis}{23}{chapter.2}
 \contentsline {chapter}{ENCODE peaks analysis}{23}{chapter.2}
 \contentsline {section}{\numberline {2.1}Data}{23}{section.2.1}
 \contentsline {section}{\numberline {2.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{25}{section.2.2}
 \contentsline {subsection}{\numberline {2.2.1}Data realignment}{26}{subsection.2.2.1}
 \contentsline {section}{\numberline {2.3}Nucleosome organization around transcription factor binding sites}{27}{section.2.3}
 \contentsline {section}{\numberline {2.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{29}{section.2.4}
 \contentsline {section}{\numberline {2.5}CTCF and JunD interactomes}{33}{section.2.5}
 \contentsline {section}{\numberline {2.6}EBF1 binds nucleosomes}{38}{section.2.6}
 \contentsline {section}{\numberline {2.7}Methods}{40}{section.2.7}
 \contentsline {subsection}{\numberline {2.7.1}Data and data processing}{40}{subsection.2.7.1}
 \contentsline {subsection}{\numberline {2.7.2}Classification of MNase patterns}{41}{subsection.2.7.2}
 \contentsline {subsection}{\numberline {2.7.3}Quantifying nucleosome array intensity from classification results}{42}{subsection.2.7.3}
 \contentsline {subsection}{\numberline {2.7.4}Peak colocalization}{43}{subsection.2.7.4}
 \contentsline {subsection}{\numberline {2.7.5}NDR detection}{43}{subsection.2.7.5}
 \contentsline {subsection}{\numberline {2.7.6}CTCF and JunD interactors}{46}{subsection.2.7.6}
 \contentsline {subsection}{\numberline {2.7.7}EBF1 and nucleosome}{47}{subsection.2.7.7}
 \contentsline {chapter}{\numberline {3}SMiLE-seq data analysis}{49}{chapter.3}
 \contentsline {chapter}{SMiLE-seq data analysis}{49}{chapter.3}
 \contentsline {subsection}{\numberline {3.0.1}Introduction}{49}{subsection.3.0.1}
 \contentsline {subsection}{\numberline {3.0.2}Hidden Markov Model Motif discovery}{51}{subsection.3.0.2}
 \contentsline {subsection}{\numberline {3.0.3}Binding motif evaluation}{52}{subsection.3.0.3}
 \contentsline {subsection}{\numberline {3.0.4}Results}{54}{subsection.3.0.4}
 \contentsline {subsection}{\numberline {3.0.5}Conclusions}{56}{subsection.3.0.5}
 \contentsline {chapter}{\numberline {4}Chromatin accessibility of monocytes}{57}{chapter.4}
 \contentsline {section}{\numberline {4.1}ATAC-seq}{57}{section.4.1}
 \contentsline {section}{\numberline {4.2}Monitoring TF binding}{59}{section.4.2}
 \contentsline {section}{\numberline {4.3}The advent of single cell DGF}{60}{section.4.3}
-\contentsline {section}{\numberline {4.4}A quick overview of scATAC-seq data analysis}{60}{section.4.4}
-\contentsline {section}{\numberline {4.5}Open questions}{60}{section.4.5}
-\contentsline {section}{\numberline {4.6}Data}{62}{section.4.6}
-\contentsline {section}{\numberline {4.7}Identification of catalog of chromatin architectures}{62}{section.4.7}
-\contentsline {subsection}{\numberline {4.7.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{63}{subsection.4.7.1}
-\contentsline {subsection}{\numberline {4.7.2}EMSequence : an algorithm to identify over-represented sequences}{64}{subsection.4.7.2}
-\contentsline {subsubsection}{without shift and flip}{64}{subsection.4.7.2}
-\contentsline {subsubsection}{with shift and flip}{65}{equation.4.7.2}
-\contentsline {subsection}{\numberline {4.7.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{66}{subsection.4.7.3}
-\contentsline {subsection}{\numberline {4.7.4}Data realignment}{67}{subsection.4.7.4}
-\contentsline {section}{\numberline {4.8}Results}{68}{section.4.8}
-\contentsline {subsection}{\numberline {4.8.1}Fragment size analysis}{68}{subsection.4.8.1}
-\contentsline {subsection}{\numberline {4.8.2}Measuring open chromatin and nucleosome occupancy}{70}{subsection.4.8.2}
-\contentsline {subsection}{\numberline {4.8.3}Evaluation of EMSequence and ChIPPartitioning}{72}{subsection.4.8.3}
-\contentsline {subsubsection}{EMSequence}{72}{subsection.4.8.3}
-\contentsline {subsubsection}{ChIPPartitioning}{75}{figure.caption.38}
-\contentsline {section}{\numberline {4.9}Aligning the binding sites}{77}{section.4.9}
-\contentsline {section}{\numberline {4.10}Exploring individual TF classes}{79}{section.4.10}
-\contentsline {section}{\numberline {4.11}Discussions}{81}{section.4.11}
-\contentsline {section}{\numberline {4.12}Perspectives}{81}{section.4.12}
-\contentsline {section}{\numberline {4.13}Methods}{82}{section.4.13}
-\contentsline {subsection}{\numberline {4.13.1}Implementations}{82}{subsection.4.13.1}
-\contentsline {subsection}{\numberline {4.13.2}Fragment classes}{82}{subsection.4.13.2}
-\contentsline {subsection}{\numberline {4.13.3}Simulated sequences}{83}{subsection.4.13.3}
-\contentsline {subsection}{\numberline {4.13.4}Realignment using JASPAR motifs}{83}{subsection.4.13.4}
-\contentsline {subsection}{\numberline {4.13.5}Display of motif logo}{83}{subsection.4.13.5}
-\contentsline {subsection}{\numberline {4.13.6}Model extension}{83}{subsection.4.13.6}
-\contentsline {subsection}{\numberline {4.13.7}Extracting data assigned to a class}{83}{subsection.4.13.7}
-\contentsline {chapter}{\numberline {A}An appendix}{85}{appendix.A}
-\contentsline {section}{\numberline {A.1}Supplementary figures}{85}{section.A.1}
+\contentsline {section}{\numberline {4.4}Open issues}{60}{section.4.4}
+\contentsline {section}{\numberline {4.5}Data}{60}{section.4.5}
+\contentsline {section}{\numberline {4.6}Identifying over-represented signals}{61}{section.4.6}
+\contentsline {subsection}{\numberline {4.6.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{61}{subsection.4.6.1}
+\contentsline {subsection}{\numberline {4.6.2}EMSequence : an algorithm to identify over-represented sequences}{61}{subsection.4.6.2}
+\contentsline {subsubsection}{without shift and flip}{63}{figure.caption.32}
+\contentsline {subsubsection}{with shift and flip}{63}{equation.4.6.2}
+\contentsline {subsection}{\numberline {4.6.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{65}{subsection.4.6.3}
+\contentsline {subsection}{\numberline {4.6.4}Data realignment}{66}{subsection.4.6.4}
+\contentsline {section}{\numberline {4.7}Results}{66}{section.4.7}
+\contentsline {subsection}{\numberline {4.7.1}Fragment size analysis}{66}{subsection.4.7.1}
+\contentsline {subsection}{\numberline {4.7.2}Measuring open chromatin and nucleosome occupancy}{69}{subsection.4.7.2}
+\contentsline {subsection}{\numberline {4.7.3}Evaluation of EMSequence and ChIPPartitioning}{71}{subsection.4.7.3}
+\contentsline {subsubsection}{EMSequence}{71}{subsection.4.7.3}
+\contentsline {subsubsection}{ChIPPartitioning}{74}{figure.caption.37}
+\contentsline {section}{\numberline {4.8}Aligning the binding sites}{76}{section.4.8}
+\contentsline {section}{\numberline {4.9}Exploring individual TF classes}{79}{section.4.9}
+\contentsline {section}{\numberline {4.10}Discussions}{80}{section.4.10}
+\contentsline {section}{\numberline {4.11}Perspectives}{80}{section.4.11}
+\contentsline {section}{\numberline {4.12}Methods}{81}{section.4.12}
+\contentsline {subsection}{\numberline {4.12.1}Implementations}{81}{subsection.4.12.1}
+\contentsline {subsection}{\numberline {4.12.2}Fragment classes}{81}{subsection.4.12.2}
+\contentsline {subsection}{\numberline {4.12.3}Simulated sequences}{81}{subsection.4.12.3}
+\contentsline {subsection}{\numberline {4.12.4}Realignment using JASPAR motifs}{82}{subsection.4.12.4}
+\contentsline {subsection}{\numberline {4.12.5}Model extension}{82}{subsection.4.12.5}
+\contentsline {subsection}{\numberline {4.12.6}Extracting data assigned to a class}{82}{subsection.4.12.6}
 \vspace {\normalbaselineskip }
-\contentsline {chapter}{Bibliography}{103}{section*.62}
-\contentsline {chapter}{Bibliography}{111}{appendix*.63}
-\contentsline {chapter}{Curriculum Vitae}{113}{section*.64}
+\contentsline {chapter}{\numberline {A}An appendix}{87}{appendix.A}
+\contentsline {section}{\numberline {A.1}Supplementary figures}{87}{section.A.1}
+\contentsline {chapter}{Bibliography}{105}{section*.62}
+\contentsline {chapter}{Bibliography}{113}{appendix*.63}
+\contentsline {chapter}{Curriculum Vitae}{115}{section*.64}
diff --git a/tail/appendix.aux b/tail/appendix.aux
index de872b2..f64b308 100644
--- a/tail/appendix.aux
+++ b/tail/appendix.aux
@@ -1,97 +1,97 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
 \citation{jolma_dna-binding_2013}
 \citation{jolma_dna-binding_2013}
-\@writefile{toc}{\contentsline {chapter}{\numberline {A}An appendix}{85}{appendix.A}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {A}An appendix}{87}{appendix.A}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
 \@writefile{loa}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {section}{\numberline {A.1}Supplementary figures}{85}{section.A.1}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.1}{\ignorespaces \textbf  {Predictive power of SMiLE-seq :} \textbf  {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite  {jolma_dna-binding_2013}) are also displayed. \textbf  {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }}{85}{figure.caption.43}}
-\newlabel{suppl_smileseq_auc_2}{{A.1}{85}{\textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }{figure.caption.43}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.2}{\ignorespaces \textbf  {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{86}{figure.caption.44}}
-\newlabel{suppl_encode_peaks_em_ctcf}{{A.2}{86}{\textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.44}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.3}{\ignorespaces \textbf  {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{87}{figure.caption.45}}
-\newlabel{suppl_encode_peaks_em_nrf1}{{A.3}{87}{\textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.45}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.4}{\ignorespaces \textbf  {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{88}{figure.caption.46}}
-\newlabel{suppl_encode_peaks_em_cfos}{{A.4}{88}{\textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.46}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.5}{\ignorespaces \textbf  {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{89}{figure.caption.47}}
-\newlabel{suppl_encode_peaks_em_max}{{A.5}{89}{\textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.47}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.6}{\ignorespaces \textbf  {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{90}{figure.caption.48}}
-\newlabel{suppl_encode_peaks_em_brca1}{{A.6}{90}{\textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.48}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.7}{\ignorespaces \textbf  {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }}{91}{figure.caption.49}}
-\newlabel{suppl_encode_peaks_ctcf_ndr}{{A.7}{91}{\textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }{figure.caption.49}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {A.1}Supplementary figures}{87}{section.A.1}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.1}{\ignorespaces \textbf  {Predictive power of SMiLE-seq :} \textbf  {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite  {jolma_dna-binding_2013}) are also displayed. \textbf  {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }}{87}{figure.caption.43}}
+\newlabel{suppl_smileseq_auc_2}{{A.1}{87}{\textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }{figure.caption.43}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.2}{\ignorespaces \textbf  {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{88}{figure.caption.44}}
+\newlabel{suppl_encode_peaks_em_ctcf}{{A.2}{88}{\textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.44}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.3}{\ignorespaces \textbf  {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{89}{figure.caption.45}}
+\newlabel{suppl_encode_peaks_em_nrf1}{{A.3}{89}{\textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.45}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.4}{\ignorespaces \textbf  {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{90}{figure.caption.46}}
+\newlabel{suppl_encode_peaks_em_cfos}{{A.4}{90}{\textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.46}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.5}{\ignorespaces \textbf  {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{91}{figure.caption.47}}
+\newlabel{suppl_encode_peaks_em_max}{{A.5}{91}{\textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.47}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.6}{\ignorespaces \textbf  {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{92}{figure.caption.48}}
+\newlabel{suppl_encode_peaks_em_brca1}{{A.6}{92}{\textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.48}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.7}{\ignorespaces \textbf  {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }}{93}{figure.caption.49}}
+\newlabel{suppl_encode_peaks_ctcf_ndr}{{A.7}{93}{\textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }{figure.caption.49}{}}
 \citation{khan_jaspar_2018}
 \citation{khan_jaspar_2018}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.8}{\ignorespaces \textbf  {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf  {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf  {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf  {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }}{92}{figure.caption.50}}
-\newlabel{suppl_encode_peaks_jund_association}{{A.8}{92}{\textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }{figure.caption.50}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.9}{\ignorespaces \textbf  {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{93}{figure.caption.51}}
-\newlabel{suppl_encode_peaks_ebf1_nucl}{{A.9}{93}{\textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.51}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.10}{\ignorespaces \textbf  {EBF1 logo} from JASPAR binding model MA0154.3 \citep  {khan_jaspar_2018}.\relax }}{93}{figure.caption.52}}
-\newlabel{suppl_encode_peaks_ebf1_logo}{{A.10}{93}{\textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }{figure.caption.52}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.11}{\ignorespaces \textbf  {EBF1 binding sites} chromatin features. \textbf  {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf  {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf  {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }}{94}{figure.caption.53}}
-\newlabel{suppl_encode_peaks_ebf1_chrom}{{A.11}{94}{\textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }{figure.caption.53}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.8}{\ignorespaces \textbf  {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf  {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf  {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf  {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }}{94}{figure.caption.50}}
+\newlabel{suppl_encode_peaks_jund_association}{{A.8}{94}{\textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }{figure.caption.50}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.9}{\ignorespaces \textbf  {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{95}{figure.caption.51}}
+\newlabel{suppl_encode_peaks_ebf1_nucl}{{A.9}{95}{\textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.51}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.10}{\ignorespaces \textbf  {EBF1 logo} from JASPAR binding model MA0154.3 \citep  {khan_jaspar_2018}.\relax }}{95}{figure.caption.52}}
+\newlabel{suppl_encode_peaks_ebf1_logo}{{A.10}{95}{\textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }{figure.caption.52}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.11}{\ignorespaces \textbf  {EBF1 binding sites} chromatin features. \textbf  {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf  {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf  {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }}{96}{figure.caption.53}}
+\newlabel{suppl_encode_peaks_ebf1_chrom}{{A.11}{96}{\textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }{figure.caption.53}{}}
 \citation{ou_motifstack_2018}
 \citation{ou_motifstack_2018}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.12}{\ignorespaces \textbf  {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{95}{figure.caption.54}}
-\newlabel{suppl_emread_sp1_noshift_flip}{{A.12}{95}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.54}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.13}{\ignorespaces \textbf  {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{95}{figure.caption.55}}
-\newlabel{suppl_emread_sp1_shift_flip}{{A.13}{95}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.55}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.14}{\ignorespaces \textbf  {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep  {ou_motifstack_2018}.\relax }}{96}{figure.caption.56}}
-\newlabel{suppl_atac_seq_emseq_best_motifs}{{A.14}{96}{\textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }{figure.caption.56}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.15}{\ignorespaces \textbf  {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{97}{figure.caption.57}}
-\newlabel{suppl_emseq_sp1_10class}{{A.15}{97}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.57}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.16}{\ignorespaces \textbf  {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{98}{figure.caption.58}}
-\newlabel{suppl_emseq_sp1_10class}{{A.16}{98}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.58}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.17}{\ignorespaces \textbf  {Extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{99}{figure.caption.59}}
-\newlabel{suppl_atac_seq_23class}{{A.17}{99}{\textbf {Extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.59}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.18}{\ignorespaces \textbf  {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{100}{figure.caption.60}}
-\newlabel{suppl_atac_seq_pu1_subclass}{{A.18}{100}{\textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.60}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {A.19}{\ignorespaces \textbf  {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{101}{figure.caption.61}}
-\newlabel{suppl_atac_seq_ap1_subclass}{{A.19}{101}{\textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.61}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.12}{\ignorespaces \textbf  {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{97}{figure.caption.54}}
+\newlabel{suppl_emread_sp1_noshift_flip}{{A.12}{97}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.54}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.13}{\ignorespaces \textbf  {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{97}{figure.caption.55}}
+\newlabel{suppl_emread_sp1_shift_flip}{{A.13}{97}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.55}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.14}{\ignorespaces \textbf  {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep  {ou_motifstack_2018}.\relax }}{98}{figure.caption.56}}
+\newlabel{suppl_atac_seq_emseq_best_motifs}{{A.14}{98}{\textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }{figure.caption.56}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.15}{\ignorespaces \textbf  {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{99}{figure.caption.57}}
+\newlabel{suppl_emseq_sp1_10class}{{A.15}{99}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.57}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.16}{\ignorespaces \textbf  {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{100}{figure.caption.58}}
+\newlabel{suppl_emseq_sp1_10class}{{A.16}{100}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.58}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.17}{\ignorespaces \textbf  {Extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{101}{figure.caption.59}}
+\newlabel{suppl_atac_seq_23class}{{A.17}{101}{\textbf {Extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.59}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.18}{\ignorespaces \textbf  {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{102}{figure.caption.60}}
+\newlabel{suppl_atac_seq_pu1_subclass}{{A.18}{102}{\textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.60}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {A.19}{\ignorespaces \textbf  {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{103}{figure.caption.61}}
+\newlabel{suppl_atac_seq_ap1_subclass}{{A.19}{103}{\textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.61}{}}
 \@setckpt{tail/appendix}{
-\setcounter{page}{102}
+\setcounter{page}{104}
 \setcounter{equation}{0}
 \setcounter{enumi}{13}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
 \setcounter{enumiv}{0}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{0}
 \setcounter{chapter}{1}
 \setcounter{section}{1}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{paragraph}{0}
 \setcounter{subparagraph}{0}
 \setcounter{figure}{19}
 \setcounter{table}{0}
 \setcounter{NAT@ctr}{0}
 \setcounter{FBcaption@count}{0}
 \setcounter{ContinuedFloat}{0}
 \setcounter{KVtest}{0}
 \setcounter{subfigure}{0}
 \setcounter{subfigure@save}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
 \setcounter{subtable@save}{0}
 \setcounter{lotdepth}{1}
 \setcounter{lips@count}{2}
 \setcounter{lstnumber}{1}
 \setcounter{Item}{13}
 \setcounter{Hfootnote}{0}
 \setcounter{bookmark@seq@number}{0}
 \setcounter{AM@survey}{0}
 \setcounter{ttlp@side}{0}
 \setcounter{myparts}{0}
 \setcounter{parentequation}{0}
 \setcounter{AlgoLine}{39}
 \setcounter{algocfline}{3}
 \setcounter{algocfproc}{3}
 \setcounter{algocf}{3}
 \setcounter{float@type}{8}
 \setcounter{nlinenum}{0}
 \setcounter{lstlisting}{0}
 \setcounter{section@level}{0}
 }
diff --git a/tail/biblio.aux b/tail/biblio.aux
index 38a0ea3..51bc313 100644
--- a/tail/biblio.aux
+++ b/tail/biblio.aux
@@ -1,150 +1,149 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
 \bibstyle{apalike}
 \bibdata{tail/bibliography}
 \bibcite{adey_rapid_2010}{{1}{2010}{{Adey et~al.}}{{}}}
 \bibcite{aerts_toucan:_2003}{{2}{2003}{{Aerts et~al.}}{{}}}
 \bibcite{aibar_scenic:_2017}{{3}{2017}{{Aibar et~al.}}{{}}}
 \bibcite{alipanahi_predicting_2015}{{4}{2015}{{Alipanahi et~al.}}{{}}}
 \bibcite{ambrosini_chip-seq_2016}{{5}{2016a}{{Ambrosini et~al.}}{{}}}
 \bibcite{ambrosini_chip-seq_2016-1}{{6}{2016b}{{Ambrosini et~al.}}{{}}}
 \bibcite{ambrosini_pwmscan:_2018}{{7}{2018}{{Ambrosini et~al.}}{{}}}
 \bibcite{ambrosini_signal_2003}{{8}{2003}{{Ambrosini et~al.}}{{}}}
 \bibcite{angerer_single_2017}{{9}{2017}{{Angerer et~al.}}{{}}}
 \bibcite{bailey_znf143_2015}{{10}{2015}{{Bailey et~al.}}{{}}}
 \bibcite{bailey_meme_2009}{{11}{2009}{{Bailey et~al.}}{{}}}
-\@writefile{toc}{\contentsline {chapter}{Bibliography}{103}{section*.62}}
+\@writefile{toc}{\contentsline {chapter}{Bibliography}{105}{section*.62}}
 \bibcite{barrett_ncbi_2011}{{12}{2011}{{Barrett et~al.}}{{}}}
 \bibcite{barski_high-resolution_2007}{{13}{2007}{{Barski et~al.}}{{}}}
 \bibcite{beckstette_fast_2006}{{14}{2006}{{Beckstette et~al.}}{{}}}
 \bibcite{berest_quantification_2018}{{15}{2018}{{Berest et~al.}}{{}}}
 \bibcite{berger_universal_2009}{{16}{2009}{{Berger and Bulyk}}{{}}}
 \bibcite{boller_defining_2018}{{17}{2018}{{Boller et~al.}}{{}}}
 \bibcite{boller_pioneering_2016}{{18}{2016}{{Boller et~al.}}{{}}}
 \bibcite{boyle_high-resolution_2008}{{19}{2008}{{Boyle et~al.}}{{}}}
 \bibcite{bucher_compilation_1986}{{20}{1986}{{Bucher and Trifonov}}{{}}}
 \bibcite{buenrostro_transposition_2013}{{21}{2013}{{Buenrostro et~al.}}{{}}}
 \bibcite{castro-mondragon_rsat_2017}{{22}{2017}{{Castro-Mondragon et~al.}}{{}}}
 \bibcite{chatr-aryamontri_biogrid_2017}{{23}{2017}{{Chatr-aryamontri et~al.}}{{}}}
 \bibcite{cheng_understanding_2012}{{24}{2012}{{Cheng et~al.}}{{}}}
 \bibcite{cirillo_opening_2002}{{25}{2002}{{Cirillo et~al.}}{{}}}
 \bibcite{consortium_integrated_2012}{{26}{2012}{{Consortium}}{{}}}
 \bibcite{dalton_clustering_2009}{{27}{2009}{{Dalton et~al.}}{{}}}
 \bibcite{donohoe_identification_2007}{{28}{2007}{{Donohoe et~al.}}{{}}}
 \bibcite{dreos_epd_2013}{{29}{2013}{{Dreos et~al.}}{{}}}
 \bibcite{dreos_eukaryotic_2017}{{30}{2017}{{Dreos et~al.}}{{}}}
 \bibcite{dreos_mga_2018}{{31}{2018}{{Dreos et~al.}}{{}}}
 \bibcite{dreos_eukaryotic_2015}{{32}{2015}{{Dreos et~al.}}{{}}}
 \bibcite{fan_characterizing_2016}{{33}{2016}{{Fan et~al.}}{{}}}
 \bibcite{fu_motifviz:_2004}{{34}{2004}{{Fu et~al.}}{{}}}
 \bibcite{fu_insulator_2008}{{35}{2008}{{Fu et~al.}}{{}}}
 \bibcite{gaffney_controls_2012}{{36}{2012}{{Gaffney et~al.}}{{}}}
 \bibcite{gerstein_architecture_2012}{{37}{2012}{{Gerstein et~al.}}{{}}}
 \bibcite{ghirlando_ctcf:_2016}{{38}{2016}{{Ghirlando and Felsenfeld}}{{}}}
 \bibcite{gonzalez-blas_cistopic:_2019}{{39}{2019}{{Gonz\IeC {\'a}lez-Blas et~al.}}{{}}}
 \bibcite{grant_fimo:_2011}{{40}{2011}{{Grant et~al.}}{{}}}
 \bibcite{grossman_positional_2018}{{41}{2018}{{Grossman et~al.}}{{}}}
 \bibcite{groux_spar-k:_2019}{{42}{2019}{{Groux and Bucher}}{{}}}
 \bibcite{guo_high_2012}{{43}{2012}{{Guo et~al.}}{{}}}
 \bibcite{hagman_early_2005}{{44}{2005}{{Hagman and Lukin}}{{}}}
 \bibcite{heinz_simple_2010}{{45}{2010}{{Heinz et~al.}}{{}}}
-\bibcite{hepler_10x_2018}{{46}{2018}{{Hepler}}{{}}}
-\bibcite{hertz_identification_1990}{{47}{1990}{{Hertz et~al.}}{{}}}
-\bibcite{hon_chromasig:_2008}{{48}{2008}{{Hon et~al.}}{{}}}
-\bibcite{ioshikhes_variety_2011}{{49}{2011}{{Ioshikhes et~al.}}{{}}}
-\bibcite{isakova_smile-seq_2017}{{50}{2017}{{Isakova et~al.}}{{}}}
-\bibcite{jolma_multiplexed_2010}{{51}{2010}{{Jolma et~al.}}{{}}}
-\bibcite{jolma_dna-binding_2013}{{52}{2013}{{Jolma et~al.}}{{}}}
-\bibcite{kent_blatblast-like_2002}{{53}{2002}{{Kent}}{{}}}
-\bibcite{khan_jaspar_2018}{{54}{2018}{{Khan et~al.}}{{}}}
-\bibcite{kiselev_sc3:_2017}{{55}{2017}{{Kiselev et~al.}}{{}}}
-\bibcite{kulakovskiy_hocomoco:_2018}{{56}{2018}{{Kulakovskiy et~al.}}{{}}}
-\bibcite{kulakovskiy_hocomoco:_2016}{{57}{2016}{{Kulakovskiy et~al.}}{{}}}
-\bibcite{kundaje_ubiquitous_2012}{{58}{2012}{{Kundaje et~al.}}{{}}}
-\bibcite{kurotaki_transcriptional_2017}{{59}{2017}{{Kurotaki et~al.}}{{}}}
-\bibcite{langmead_fast_2012}{{60}{2012}{{Langmead and Salzberg}}{{}}}
-\bibcite{langmead_ultrafast_2009}{{61}{2009}{{Langmead et~al.}}{{}}}
-\bibcite{li_sequence_2009}{{62}{2009}{{Li et~al.}}{{}}}
-\bibcite{li_identification_2019}{{63}{2019}{{Li et~al.}}{{}}}
-\bibcite{lizio_gateways_2015}{{64}{2015}{{Lizio et~al.}}{{}}}
-\bibcite{losada_cohesin_2014}{{65}{2014}{{Losada}}{{}}}
-\bibcite{maerkl_systems_2007}{{66}{2007}{{Maerkl and Quake}}{{}}}
-\bibcite{maier_early_2004}{{67}{2004}{{Maier et~al.}}{{}}}
-\bibcite{marsland_machine_2015-1}{{68}{2015}{{Marsland}}{{}}}
-\bibcite{mathelier_jaspar_2014}{{69}{2014}{{Mathelier et~al.}}{{}}}
-\bibcite{nair_probabilistic_2014}{{70}{2014}{{Nair et~al.}}{{}}}
-\bibcite{neph_expansive_2012}{{71}{2012}{{Neph et~al.}}{{}}}
-\bibcite{nielsen_catchprofiles:_2012}{{72}{2012}{{Nielsen et~al.}}{{}}}
-\bibcite{ong_ctcf:_2014}{{73}{2014}{{Ong and Corces}}{{}}}
-\bibcite{orenstein_comparative_2014}{{74}{2014}{{Orenstein and Shamir}}{{}}}
-\bibcite{ou_motifstack_2018}{{75}{2018}{{Ou et~al.}}{{}}}
-\bibcite{pizzi_fast_2008}{{76}{2008}{{Pizzi and Ukkonen}}{{}}}
-\bibcite{pollard_detection_2010}{{77}{2010}{{Pollard et~al.}}{{}}}
-\bibcite{quinlan_bedtools:_2010}{{78}{2010}{{Quinlan and Hall}}{{}}}
-\bibcite{raney_track_2014}{{79}{2014}{{Raney et~al.}}{{}}}
-\bibcite{rico_comparative_2017}{{80}{2017}{{Rico et~al.}}{{}}}
-\bibcite{roadmap_epigenomics_consortium_integrative_2015}{{81}{2015}{{{Roadmap Epigenomics Consortium} et~al.}}{{}}}
-\bibcite{rustici_arrayexpress_2013}{{82}{2013}{{Rustici et~al.}}{{}}}
-\bibcite{schones_statistical_2007}{{83}{2007}{{Schones et~al.}}{{}}}
-\bibcite{schutz_mamot:_2008}{{84}{2008}{{Sch\IeC {\"u}tz and Delorenzi}}{{}}}
-\bibcite{siepel_evolutionarily_2005}{{85}{2005}{{Siepel et~al.}}{{}}}
-\bibcite{soufi_pioneer_2015}{{86}{2015}{{Soufi et~al.}}{{}}}
-\bibcite{stedman_cohesins_2008}{{87}{2008}{{Stedman et~al.}}{{}}}
-\bibcite{trifonov_cracking_2011}{{88}{2011}{{Trifonov}}{{}}}
-\bibcite{turatsinze_using_2008}{{89}{2008}{{Turatsinze et~al.}}{{}}}
-\bibcite{vierstra_genomic_2016}{{90}{2016}{{Vierstra and Stamatoyannopoulos}}{{}}}
-\bibcite{voss_dynamic_2014}{{91}{2014}{{Voss and Hager}}{{}}}
-\bibcite{wang_sequence_2012}{{92}{2012}{{Wang et~al.}}{{}}}
-\bibcite{weirauch_evaluation_2013}{{93}{2013}{{Weirauch et~al.}}{{}}}
-\bibcite{wu_biogps:_2016}{{94}{2016}{{Wu et~al.}}{{}}}
-\bibcite{zaret_pioneer_2011}{{95}{2011}{{Zaret and Carroll}}{{}}}
-\bibcite{zhang_canonical_2014}{{96}{2014}{{Zhang et~al.}}{{}}}
-\bibcite{zhao_tred:_2005}{{97}{2005}{{Zhao et~al.}}{{}}}
-\bibcite{zhao_inferring_2009}{{98}{2009}{{Zhao et~al.}}{{}}}
-\@writefile{toc}{\contentsline {chapter}{Bibliography}{111}{appendix*.63}}
+\bibcite{hertz_identification_1990}{{46}{1990}{{Hertz et~al.}}{{}}}
+\bibcite{hon_chromasig:_2008}{{47}{2008}{{Hon et~al.}}{{}}}
+\bibcite{ioshikhes_variety_2011}{{48}{2011}{{Ioshikhes et~al.}}{{}}}
+\bibcite{isakova_smile-seq_2017}{{49}{2017}{{Isakova et~al.}}{{}}}
+\bibcite{jolma_multiplexed_2010}{{50}{2010}{{Jolma et~al.}}{{}}}
+\bibcite{jolma_dna-binding_2013}{{51}{2013}{{Jolma et~al.}}{{}}}
+\bibcite{kent_blatblast-like_2002}{{52}{2002}{{Kent}}{{}}}
+\bibcite{khan_jaspar_2018}{{53}{2018}{{Khan et~al.}}{{}}}
+\bibcite{kiselev_sc3:_2017}{{54}{2017}{{Kiselev et~al.}}{{}}}
+\bibcite{kulakovskiy_hocomoco:_2018}{{55}{2018}{{Kulakovskiy et~al.}}{{}}}
+\bibcite{kulakovskiy_hocomoco:_2016}{{56}{2016}{{Kulakovskiy et~al.}}{{}}}
+\bibcite{kundaje_ubiquitous_2012}{{57}{2012}{{Kundaje et~al.}}{{}}}
+\bibcite{kurotaki_transcriptional_2017}{{58}{2017}{{Kurotaki et~al.}}{{}}}
+\bibcite{langmead_fast_2012}{{59}{2012}{{Langmead and Salzberg}}{{}}}
+\bibcite{langmead_ultrafast_2009}{{60}{2009}{{Langmead et~al.}}{{}}}
+\bibcite{li_sequence_2009}{{61}{2009}{{Li et~al.}}{{}}}
+\bibcite{li_identification_2019}{{62}{2019}{{Li et~al.}}{{}}}
+\bibcite{lizio_gateways_2015}{{63}{2015}{{Lizio et~al.}}{{}}}
+\bibcite{losada_cohesin_2014}{{64}{2014}{{Losada}}{{}}}
+\bibcite{maerkl_systems_2007}{{65}{2007}{{Maerkl and Quake}}{{}}}
+\bibcite{maier_early_2004}{{66}{2004}{{Maier et~al.}}{{}}}
+\bibcite{marsland_machine_2015-1}{{67}{2015}{{Marsland}}{{}}}
+\bibcite{mathelier_jaspar_2014}{{68}{2014}{{Mathelier et~al.}}{{}}}
+\bibcite{nair_probabilistic_2014}{{69}{2014}{{Nair et~al.}}{{}}}
+\bibcite{neph_expansive_2012}{{70}{2012}{{Neph et~al.}}{{}}}
+\bibcite{nielsen_catchprofiles:_2012}{{71}{2012}{{Nielsen et~al.}}{{}}}
+\bibcite{ong_ctcf:_2014}{{72}{2014}{{Ong and Corces}}{{}}}
+\bibcite{orenstein_comparative_2014}{{73}{2014}{{Orenstein and Shamir}}{{}}}
+\bibcite{ou_motifstack_2018}{{74}{2018}{{Ou et~al.}}{{}}}
+\bibcite{pizzi_fast_2008}{{75}{2008}{{Pizzi and Ukkonen}}{{}}}
+\bibcite{pollard_detection_2010}{{76}{2010}{{Pollard et~al.}}{{}}}
+\bibcite{quinlan_bedtools:_2010}{{77}{2010}{{Quinlan and Hall}}{{}}}
+\bibcite{raney_track_2014}{{78}{2014}{{Raney et~al.}}{{}}}
+\bibcite{rico_comparative_2017}{{79}{2017}{{Rico et~al.}}{{}}}
+\bibcite{roadmap_epigenomics_consortium_integrative_2015}{{80}{2015}{{{Roadmap Epigenomics Consortium} et~al.}}{{}}}
+\bibcite{rustici_arrayexpress_2013}{{81}{2013}{{Rustici et~al.}}{{}}}
+\bibcite{schones_statistical_2007}{{82}{2007}{{Schones et~al.}}{{}}}
+\bibcite{schutz_mamot:_2008}{{83}{2008}{{Sch\IeC {\"u}tz and Delorenzi}}{{}}}
+\bibcite{siepel_evolutionarily_2005}{{84}{2005}{{Siepel et~al.}}{{}}}
+\bibcite{soufi_pioneer_2015}{{85}{2015}{{Soufi et~al.}}{{}}}
+\bibcite{stedman_cohesins_2008}{{86}{2008}{{Stedman et~al.}}{{}}}
+\bibcite{trifonov_cracking_2011}{{87}{2011}{{Trifonov}}{{}}}
+\bibcite{turatsinze_using_2008}{{88}{2008}{{Turatsinze et~al.}}{{}}}
+\bibcite{vierstra_genomic_2016}{{89}{2016}{{Vierstra and Stamatoyannopoulos}}{{}}}
+\bibcite{voss_dynamic_2014}{{90}{2014}{{Voss and Hager}}{{}}}
+\bibcite{wang_sequence_2012}{{91}{2012}{{Wang et~al.}}{{}}}
+\bibcite{weirauch_evaluation_2013}{{92}{2013}{{Weirauch et~al.}}{{}}}
+\bibcite{wu_biogps:_2016}{{93}{2016}{{Wu et~al.}}{{}}}
+\bibcite{zaret_pioneer_2011}{{94}{2011}{{Zaret and Carroll}}{{}}}
+\bibcite{zhang_canonical_2014}{{95}{2014}{{Zhang et~al.}}{{}}}
+\bibcite{zhao_tred:_2005}{{96}{2005}{{Zhao et~al.}}{{}}}
+\bibcite{zhao_inferring_2009}{{97}{2009}{{Zhao et~al.}}{{}}}
+\@writefile{toc}{\contentsline {chapter}{Bibliography}{113}{appendix*.63}}
 \@setckpt{tail/biblio}{
-\setcounter{page}{112}
+\setcounter{page}{114}
 \setcounter{equation}{0}
 \setcounter{enumi}{13}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
 \setcounter{enumiv}{0}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{0}
 \setcounter{chapter}{1}
 \setcounter{section}{1}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{paragraph}{0}
 \setcounter{subparagraph}{0}
 \setcounter{figure}{19}
 \setcounter{table}{0}
-\setcounter{NAT@ctr}{98}
+\setcounter{NAT@ctr}{97}
 \setcounter{FBcaption@count}{0}
 \setcounter{ContinuedFloat}{0}
 \setcounter{KVtest}{0}
 \setcounter{subfigure}{0}
 \setcounter{subfigure@save}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
 \setcounter{subtable@save}{0}
 \setcounter{lotdepth}{1}
 \setcounter{lips@count}{2}
 \setcounter{lstnumber}{1}
 \setcounter{Item}{13}
 \setcounter{Hfootnote}{0}
 \setcounter{bookmark@seq@number}{0}
 \setcounter{AM@survey}{0}
 \setcounter{ttlp@side}{0}
 \setcounter{myparts}{0}
 \setcounter{parentequation}{0}
 \setcounter{AlgoLine}{39}
 \setcounter{algocfline}{3}
 \setcounter{algocfproc}{3}
 \setcounter{algocf}{3}
 \setcounter{float@type}{8}
 \setcounter{nlinenum}{0}
 \setcounter{lstlisting}{0}
 \setcounter{section@level}{0}
 }
diff --git a/tail/cv.aux b/tail/cv.aux
index 53753dc..21059c7 100644
--- a/tail/cv.aux
+++ b/tail/cv.aux
@@ -1,49 +1,49 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
-\@writefile{toc}{\contentsline {chapter}{Curriculum Vitae}{113}{section*.64}}
+\@writefile{toc}{\contentsline {chapter}{Curriculum Vitae}{115}{section*.64}}
 \@setckpt{tail/cv}{
-\setcounter{page}{115}
+\setcounter{page}{117}
 \setcounter{equation}{0}
 \setcounter{enumi}{13}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
 \setcounter{enumiv}{0}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{0}
 \setcounter{chapter}{1}
 \setcounter{section}{1}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{paragraph}{0}
 \setcounter{subparagraph}{0}
 \setcounter{figure}{19}
 \setcounter{table}{0}
-\setcounter{NAT@ctr}{98}
+\setcounter{NAT@ctr}{97}
 \setcounter{FBcaption@count}{0}
 \setcounter{ContinuedFloat}{0}
 \setcounter{KVtest}{0}
 \setcounter{subfigure}{0}
 \setcounter{subfigure@save}{0}
 \setcounter{lofdepth}{1}
 \setcounter{subtable}{0}
 \setcounter{subtable@save}{0}
 \setcounter{lotdepth}{1}
 \setcounter{lips@count}{2}
 \setcounter{lstnumber}{1}
 \setcounter{Item}{13}
 \setcounter{Hfootnote}{0}
 \setcounter{bookmark@seq@number}{0}
 \setcounter{AM@survey}{0}
 \setcounter{ttlp@side}{0}
 \setcounter{myparts}{0}
 \setcounter{parentequation}{0}
 \setcounter{AlgoLine}{39}
 \setcounter{algocfline}{3}
 \setcounter{algocfproc}{3}
 \setcounter{algocf}{3}
 \setcounter{float@type}{8}
 \setcounter{nlinenum}{0}
 \setcounter{lstlisting}{0}
 \setcounter{section@level}{0}
 }