diff --git a/main/ch_atac-seq.aux b/main/ch_atac-seq.aux index 5ca24ea..5ab3784 100644 --- a/main/ch_atac-seq.aux +++ b/main/ch_atac-seq.aux @@ -1,174 +1,168 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{vierstra_genomic_2016} \citation{neph_expansive_2012} \citation{adey_rapid_2010,buenrostro_transposition_2013} \citation{barski_high-resolution_2007} \citation{vierstra_genomic_2016} \citation{vierstra_genomic_2016} \citation{adey_rapid_2010,buenrostro_transposition_2013} \citation{adey_rapid_2010} -\citation{adey_rapid_2010} \@writefile{toc}{\contentsline {chapter}{\numberline {4}Chromatin accessibility of monocytes}{57}{chapter.4}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} \newlabel{atac_seq}{{4}{57}{Chromatin accessibility of monocytes}{chapter.4}{}} \@writefile{chapter}{\contentsline {toc}{Chromatin accessibility of monocytes}{57}{chapter.4}} \@writefile{toc}{\contentsline {section}{\numberline {4.1}ATAC-seq}{57}{section.4.1}} \@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces \textbf {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep {vierstra_genomic_2016}.\relax }}{58}{figure.caption.31}} \newlabel{atac_seq_atac_seq}{{4.1}{58}{\textbf {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep {vierstra_genomic_2016}.\relax }{figure.caption.31}{}} +\citation{adey_rapid_2010} \citation{neph_expansive_2012} \citation{berest_quantification_2018} \citation{grossman_positional_2018} \@writefile{toc}{\contentsline {section}{\numberline {4.2}Monitoring TF binding}{59}{section.4.2}} \citation{angerer_single_2017} -\citation{fan_characterizing_2016,kiselev_sc3:_2017} -\citation{aibar_scenic:_2017} -\citation{gonzalez-blas_cistopic:_2019} -\citation{buenrostro_transposition_2013} \@writefile{toc}{\contentsline {section}{\numberline {4.3}The advent of single cell DGF}{60}{section.4.3}} -\@writefile{toc}{\contentsline {section}{\numberline {4.4}A quick overview of scATAC-seq data analysis}{60}{section.4.4}} -\@writefile{toc}{\contentsline {section}{\numberline {4.5}Open questions}{60}{section.4.5}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces \textbf {framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.\relax }}{61}{figure.caption.32}} -\newlabel{atac_seq_pipeline}{{4.2}{61}{\textbf {framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.\relax }{figure.caption.32}{}} -\citation{hepler_10x_2018} +\@writefile{toc}{\contentsline {section}{\numberline {4.4}Open issues}{60}{section.4.4}} +\@writefile{toc}{\contentsline {section}{\numberline {4.5}Data}{60}{section.4.5}} \citation{hon_chromasig:_2008} \citation{nielsen_catchprofiles:_2012} \citation{kundaje_ubiquitous_2012} \citation{nair_probabilistic_2014} \citation{groux_spar-k:_2019} -\@writefile{toc}{\contentsline {section}{\numberline {4.6}Data}{62}{section.4.6}} -\@writefile{toc}{\contentsline {section}{\numberline {4.7}Identification of catalog of chromatin architectures}{62}{section.4.7}} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{63}{subsection.4.7.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure. EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }}{63}{figure.caption.33}} -\newlabel{atac_seq_em}{{4.3}{63}{\textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.\\ EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }{figure.caption.33}{}} +\@writefile{toc}{\contentsline {section}{\numberline {4.6}Identifying over-represented signals}{61}{section.4.6}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.6.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{61}{subsection.4.6.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.6.2}EMSequence : an algorithm to identify over-represented sequences}{61}{subsection.4.6.2}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces \textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure. EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }}{62}{figure.caption.32}} +\newlabel{atac_seq_em}{{4.2}{62}{\textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.\\ EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }{figure.caption.32}{}} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.2}EMSequence : an algorithm to identify over-represented sequences}{64}{subsection.4.7.2}} -\@writefile{toc}{\contentsline {subsubsection}{without shift and flip}{64}{subsection.4.7.2}} -\newlabel{atac_seq_emseq_likelihood}{{4.1}{64}{without shift and flip}{equation.4.7.1}{}} \citation{nair_probabilistic_2014} +\@writefile{toc}{\contentsline {subsubsection}{without shift and flip}{63}{figure.caption.32}} +\newlabel{atac_seq_emseq_likelihood}{{4.1}{63}{without shift and flip}{equation.4.6.1}{}} +\newlabel{atac_seq_emseq_update_model}{{4.2}{63}{without shift and flip}{equation.4.6.2}{}} +\@writefile{toc}{\contentsline {subsubsection}{with shift and flip}{63}{equation.4.6.2}} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} -\newlabel{atac_seq_emseq_update_model}{{4.2}{65}{without shift and flip}{equation.4.7.2}{}} -\@writefile{toc}{\contentsline {subsubsection}{with shift and flip}{65}{equation.4.7.2}} -\newlabel{atac_seq_emseq_likelihood_shift_flip}{{4.3}{65}{with shift and flip}{equation.4.7.3}{}} -\newlabel{atac_seq_emseq_reverse_motif}{{4.4}{65}{with shift and flip}{equation.4.7.4}{}} -\newlabel{atac_seq_emseq_update_model_shift_flip}{{4.5}{66}{with shift and flip}{equation.4.7.5}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{66}{subsection.4.7.3}} +\newlabel{atac_seq_emseq_likelihood_shift_flip}{{4.3}{64}{with shift and flip}{equation.4.6.3}{}} +\newlabel{atac_seq_emseq_reverse_motif}{{4.4}{64}{with shift and flip}{equation.4.6.4}{}} +\newlabel{atac_seq_emseq_update_model_shift_flip}{{4.5}{64}{with shift and flip}{equation.4.6.5}{}} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} -\newlabel{atac_seq_emjoint_likelihood}{{4.6}{67}{EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{equation.4.7.6}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.4}Data realignment}{67}{subsection.4.7.4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.6.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{65}{subsection.4.6.3}} +\newlabel{atac_seq_emjoint_likelihood}{{4.6}{65}{EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{equation.4.6.6}{}} \citation{voss_dynamic_2014} \citation{cirillo_opening_2002,zaret_pioneer_2011,soufi_pioneer_2015} \citation{buenrostro_transposition_2013} -\@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces \textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }}{68}{figure.caption.34}} -\newlabel{atac_seq_fragment_size}{{4.4}{68}{\textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }{figure.caption.34}{}} -\@writefile{toc}{\contentsline {section}{\numberline {4.8}Results}{68}{section.4.8}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.8.1}Fragment size analysis}{68}{subsection.4.8.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces \textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively. The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }}{69}{figure.caption.35}} -\newlabel{atac_seq_ctcf_all_data}{{4.5}{69}{\textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }{figure.caption.35}{}} \citation{buenrostro_transposition_2013} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.6.4}Data realignment}{66}{subsection.4.6.4}} +\@writefile{toc}{\contentsline {section}{\numberline {4.7}Results}{66}{section.4.7}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.1}Fragment size analysis}{66}{subsection.4.7.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }}{67}{figure.caption.33}} +\newlabel{atac_seq_fragment_size}{{4.3}{67}{\textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }{figure.caption.33}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces \textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively. The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }}{68}{figure.caption.34}} +\newlabel{atac_seq_ctcf_all_data}{{4.4}{68}{\textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }{figure.caption.34}{}} \citation{adey_rapid_2010} \citation{buenrostro_transposition_2013,li_identification_2019} -\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces \textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }}{70}{figure.caption.36}} -\newlabel{atac_seq_ctcf_sp1_myc_ebf1_footprint}{{4.6}{70}{\textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }{figure.caption.36}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.8.2}Measuring open chromatin and nucleosome occupancy}{70}{subsection.4.8.2}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces \textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }}{69}{figure.caption.35}} +\newlabel{atac_seq_ctcf_sp1_myc_ebf1_footprint}{{4.5}{69}{\textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }{figure.caption.35}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.2}Measuring open chromatin and nucleosome occupancy}{69}{subsection.4.7.2}} \citation{neph_expansive_2012} \citation{fu_insulator_2008} \citation{neph_expansive_2012} \citation{kundaje_ubiquitous_2012} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.8.3}Evaluation of EMSequence and ChIPPartitioning}{72}{subsection.4.8.3}} -\@writefile{toc}{\contentsline {subsubsection}{EMSequence}{72}{subsection.4.8.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.3}Evaluation of EMSequence and ChIPPartitioning}{71}{subsection.4.7.3}} +\@writefile{toc}{\contentsline {subsubsection}{EMSequence}{71}{subsection.4.7.3}} \citation{kent_blatblast-like_2002} \citation{chatr-aryamontri_biogrid_2017} \citation{castro-mondragon_rsat_2017} -\@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces \textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }}{73}{figure.caption.37}} -\newlabel{atac_seq_emseq_auc_roc}{{4.7}{73}{\textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }{figure.caption.37}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }}{74}{figure.caption.38}} -\newlabel{atac_seq_emseq_sp1_10class}{{4.8}{74}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }{figure.caption.38}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces \textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }}{72}{figure.caption.36}} +\newlabel{atac_seq_emseq_auc_roc}{{4.6}{72}{\textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }{figure.caption.36}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }}{73}{figure.caption.37}} +\newlabel{atac_seq_emseq_sp1_10class}{{4.7}{73}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }{figure.caption.37}{}} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {subsubsection}{ChIPPartitioning}{75}{figure.caption.38}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{76}{figure.caption.39}} -\newlabel{atac_seq_emread_ctcf_noshift_flip}{{4.9}{76}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.39}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.10}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{76}{figure.caption.40}} -\newlabel{atac_seq_emread_ctcf_shift_flip}{{4.10}{76}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.40}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{74}{figure.caption.38}} +\newlabel{atac_seq_emread_ctcf_noshift_flip}{{4.8}{74}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.38}{}} +\@writefile{toc}{\contentsline {subsubsection}{ChIPPartitioning}{74}{figure.caption.37}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{75}{figure.caption.39}} +\newlabel{atac_seq_emread_ctcf_shift_flip}{{4.9}{75}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.39}{}} +\@writefile{toc}{\contentsline {section}{\numberline {4.8}Aligning the binding sites}{76}{section.4.8}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.10}{\ignorespaces \textbf {Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{77}{figure.caption.40}} +\newlabel{atac_seq_23class}{{4.10}{77}{\textbf {Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.40}{}} \citation{kurotaki_transcriptional_2017,rico_comparative_2017} -\@writefile{toc}{\contentsline {section}{\numberline {4.9}Aligning the binding sites}{77}{section.4.9}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.11}{\ignorespaces \textbf {Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{78}{figure.caption.41}} -\newlabel{atac_seq_23class}{{4.11}{78}{\textbf {Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.41}{}} \citation{castro-mondragon_rsat_2017} -\@writefile{toc}{\contentsline {section}{\numberline {4.10}Exploring individual TF classes}{79}{section.4.10}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.12}{\ignorespaces \textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{80}{figure.caption.42}} -\newlabel{atac_seq_ctcf_subclass}{{4.12}{80}{\textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.42}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.11}{\ignorespaces \textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{79}{figure.caption.41}} +\newlabel{atac_seq_ctcf_subclass}{{4.11}{79}{\textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.41}{}} +\@writefile{toc}{\contentsline {section}{\numberline {4.9}Exploring individual TF classes}{79}{section.4.9}} \citation{marsland_machine_2015-1} \citation{fan_characterizing_2016,kiselev_sc3:_2017} \citation{aibar_scenic:_2017} \citation{gonzalez-blas_cistopic:_2019} -\@writefile{toc}{\contentsline {section}{\numberline {4.11}Discussions}{81}{section.4.11}} -\@writefile{toc}{\contentsline {section}{\numberline {4.12}Perspectives}{81}{section.4.12}} -\@writefile{toc}{\contentsline {section}{\numberline {4.13}Methods}{82}{section.4.13}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.1}Implementations}{82}{subsection.4.13.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.2}Fragment classes}{82}{subsection.4.13.2}} -\newlabel{atac_seq_fragment_length_class}{{4.7}{82}{Fragment classes}{equation.4.13.7}{}} +\@writefile{toc}{\contentsline {section}{\numberline {4.10}Discussions}{80}{section.4.10}} +\@writefile{toc}{\contentsline {section}{\numberline {4.11}Perspectives}{80}{section.4.11}} +\citation{castro-mondragon_rsat_2017} +\@writefile{toc}{\contentsline {section}{\numberline {4.12}Methods}{81}{section.4.12}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.1}Implementations}{81}{subsection.4.12.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.2}Fragment classes}{81}{subsection.4.12.2}} +\newlabel{atac_seq_fragment_length_class}{{4.7}{81}{Fragment classes}{equation.4.12.7}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.3}Simulated sequences}{81}{subsection.4.12.3}} \citation{dalton_clustering_2009} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.3}Simulated sequences}{83}{subsection.4.13.3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.4}Realignment using JASPAR motifs}{83}{subsection.4.13.4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.5}Display of motif logo}{83}{subsection.4.13.5}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.6}Model extension}{83}{subsection.4.13.6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.13.7}Extracting data assigned to a class}{83}{subsection.4.13.7}} -\newlabel{encode_peaks_algo_ndr_extend}{{3}{84}{Extracting data assigned to a class}{algocfline.3}{}} -\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Computes a matrix containing the data assigned to a given class $S$.\relax }}{84}{algocf.3}} -\newlabel{atac_seq_algo_extract_class}{{3}{84}{Extracting data assigned to a class}{algocf.3}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.4}Realignment using JASPAR motifs}{82}{subsection.4.12.4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.5}Model extension}{82}{subsection.4.12.5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.12.6}Extracting data assigned to a class}{82}{subsection.4.12.6}} +\@writefile{lot}{\contentsline {table}{\numberline {4.1}{\ignorespaces \textbf {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }}{83}{table.caption.42}} +\newlabel{atac_seq_motif_table}{{4.1}{83}{\textbf {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }{table.caption.42}{}} +\newlabel{encode_peaks_algo_ndr_extend}{{3}{85}{Extracting data assigned to a class}{algocfline.3}{}} +\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Computes a matrix containing the data assigned to a given class $S$.\relax }}{85}{algocf.3}} +\newlabel{atac_seq_algo_extract_class}{{3}{85}{Extracting data assigned to a class}{algocf.3}{}} \@setckpt{main/ch_atac-seq}{ -\setcounter{page}{85} +\setcounter{page}{86} \setcounter{equation}{7} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{4} -\setcounter{section}{13} -\setcounter{subsection}{7} +\setcounter{section}{12} +\setcounter{subsection}{6} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} -\setcounter{figure}{12} -\setcounter{table}{0} +\setcounter{figure}{11} +\setcounter{table}{1} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{39} \setcounter{algocfline}{3} \setcounter{algocfproc}{3} \setcounter{algocf}{3} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/main/ch_atac-seq.tex b/main/ch_atac-seq.tex index d38fe50..a980632 100644 --- a/main/ch_atac-seq.tex +++ b/main/ch_atac-seq.tex @@ -1,543 +1,589 @@ \cleardoublepage \chapter{Chromatin accessibility of monocytes} \label{atac_seq} \markboth{Chromatin accessibility of monocytes}{Chromatin accessibility of monocytes} \addcontentsline{chapter}{toc}{Chromatin accessibility of monocytes} +The chapter contains ongoing work. I present the basements of a computational framework to analyse chromatin organization around TF binding sites from ATAC-seq data. As a matter of fact, the results presented here are quite preliminary. However, in the best case, this may shape a basis for other projects. % Because reporting these results, even if incomplete, is at least useless and at most useful to the scientific community, there is no reason not to present them. + Digital genomic footprinting (DGF) methods are a powerful mean to reveal protein occupancy, genome-wide, at once \citep{vierstra_genomic_2016}. These methods allow to identify open chromatin regions within a genome and thus to list its active regulatory sites. These technologies are based on a targeted degradation of the open regions of the genome, either by DNaseI \citep{neph_expansive_2012} or by a transposon-based system \citep{adey_rapid_2010,buenrostro_transposition_2013}. DGF techologies encounter a yet ever-growing popularity because of the wealth of data produced in a single experiment. Indeed, instead of running thousands - one per transcription factor (TF) - of chromatin immonuprecitpitation followed by sequencing (ChIP-seq) \citep{barski_high-resolution_2007} to know where each TF is binding, it is sufficient to run a single chromatin accessibility assay. The price to pay for this gain of simplicity, compared to ChIP-seq, is a loss of information. Indeed, chromatin accessibility assays allow to list any active regulatory region within a sample, at once but do not give any information about which TF or complex is bound at a given location The transposon-based method - named assay for transposase-accessible chromatin with high-throughput sequencing (ATAC-seq) - is currently gaining a rapid popularity due to its technical affordability and cheap labor costs compared to DNaseI-based methods. \section{ATAC-seq} \begin{figure}[!htbp] \begin{center} \includegraphics[scale=0.7]{images/ch_atac-seq/ATAC-seq2.png} \captionof{figure}{\textbf{ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep{vierstra_genomic_2016}.} \label{atac_seq_atac_seq} \end{center} \end{figure} ATAC-seq assays uses a modified Tn5 transposase enzyme to selectively fragment the accessible regions of the genome \citep{adey_rapid_2010,buenrostro_transposition_2013}. The enzyme inserts small double stranded barcodes inside the DNA wherever it is accessible resulting a the creation of double strand breaks (shown in Figure \ref{atac_seq_atac_seq}). This process, known as tagmentation, allows to i) fragment the genome and ii) inserts sequencing barcodes at once. It should be noted that the Tn5 acts as an homodimer and thus inserts two copy of the same adaptors separated from each other by 9bp \citep{adey_rapid_2010}. For a given genomic locus, the number of insertions depends on several parameters. First, the Tn5 transposition rate itself depends on the enzyme and substrate concentrations. Second, it has been demonstrated that the Tn5 posses a slight sequence preference \citep{adey_rapid_2010}. Finally, the most important parameter is steric hindrance. Stretches of DNA occupied by other proteins, such a TF binding sites are protected from transposition. This usually leads to the creation of a characteristic signal at the level of the binding site. The experimental readout of ATAC-seq is produced by sequencing the DNA fragment edges – the transposition sites. Mapping these sequences against the genome allows to retrieve the insertion sites. If we represent the density of transposition events along a given region of the genome, the density usually show a decrease at the level of the binding site. This decrease is named “footprint” (see Figure \ref{atac_seq_atac_seq}). Finally, if pair-ended sequencing is performed, it is possible to know the size of each fragment which we will see later, is of biological importance. \section{Monitoring TF binding} As discusses above, DGF assays are able to highlight active regulatory elements from an entire genome, at once. However, this comes with the price of an information loss. First, even if we can identify active loci likely to be bound by TFs, we have no direct idea about the identities of the TFs bound. Second, we have no idea about the function of those regions. These regions may act as transcriptional activator or repressor. This activity is ultimately bared by the TF and other complexes bound. Thus delineating a region function necessitate to identify the TFs bound here. This task, even if difficult, can be undertaken by implementing dedicated strategies. First, it is possible to collect evidences about the identity of TF likely to bind at a given location through a motif analysis. TFs can bind DNA directly through their own DNA binding domain or indirectly, through an interaction with at least on other partner TF which binds DNA directly \citep{neph_expansive_2012}. For a given TF, direct binding events can be detected by monitoring the presence of a binding motif if a specificity model is available. Thus a footprint baring a motif is likely to reflect a direct binding event. However, this method has two important limitations : related TF often share a common DNA specificity. For indirect binding, however, nothing can be done to detect such events. Also, evidences about the presence of biggest complexes can be collected by studying the size of the footprint. Large complexes should leave large footprints. This approach, even if limited is able to pinpoint a handful of candidate TFs. Second, deciphering the functions of the regulatory elements can be undertaken by looking at the footprint produced by a given factor. Indeed, previous studies have showed that activator and repressor TFs tend to produce different types of footprints \citep{berest_quantification_2018}. Also, the spatial positioning of TF motif within the footprint seemed to be linked with the factor functions \citep{grossman_positional_2018}. For instance, factors associated with the regulation of transcription tend to have a motif in the middle of the footprint whereas factors known to interact with chromatin remodeling factors tend to have a footprint at the edge of the footprint, in contact with the surrounding nucleosomes. \section{The advent of single cell DGF} Recently, the advent of single-cell (sc) sequencing technologies have been a real game changer in the field of life science. These technological advances allowed to measure gene expression and chromatin accessibility (scATAC-seq) at a yet unprecedented resolution. As bulk sequencing was providing an average overview of what was going on, single-cell sequencing allows to monitor what is happening in each cell of a population. This advance had a profound impact on genomics for two reasons. First, for the really first time, the heterogeneity of a cell population became accessible and could be studied at the chromatin, transcriptional and protein levels. Second, the possibility of collecting high dimensionality data from tenth of thousands of individuals allows genomics to fully enter in the modern big data era, making commonly used machine learning methods usable as the number of parameters to estimate in the models became smaller than the number of individuals in the data \citep{angerer_single_2017}. -\section{A quick overview of scATAC-seq data analysis} +% \section{A quick overview of scATAC-seq data analysis} -So far, most of the single cell technologies are targeted at measuring gene expression through scRNA-seq. Naturally, dedicated algorithms and computational methods have been developed to analyze these data. Currently, the most common types of analyses made are i) data projections and dimensionality reduction such as principal component analysis (PCA), t-stochastic distributed neighbours embedding (t-SNE) or uniform manifold approximation and projection (UMAP) and ii) cell population detection by clustering the cells based on the expression of genes \citep{fan_characterizing_2016, kiselev_sc3:_2017}, by reconstructing gene regulation network \citep{aibar_scenic:_2017} or by identifying cellular states based on the accessible region motif content \citep{gonzalez-blas_cistopic:_2019}. In all cases, the use of scATAC-seq data is to determined whether a region is accessible or not. The downstream analyses characterizes the accessible region using i) the number of reads mapping in these regions as a measure of the accessibility or ii) the sequence content within these accessible regions to determine regulatory topics. +% So far, most of the single cell technologies are targeted at measuring gene expression through scRNA-seq. Naturally, dedicated algorithms and computational methods have been developed to analyze these data. Currently, the most common types of analyses made are i) data projections and dimensionality reduction such as principal component analysis (PCA), t-stochastic distributed neighbours embedding (t-SNE) or uniform manifold approximation and projection (UMAP) and ii) cell population detection by clustering the cells based on the expression of genes \citep{fan_characterizing_2016, kiselev_sc3:_2017}, by reconstructing gene regulation network \citep{aibar_scenic:_2017} or by identifying cellular states based on the accessible region motif content \citep{gonzalez-blas_cistopic:_2019}. In all cases, the use of scATAC-seq data is to determined whether a region is accessible or not. The downstream analyses characterizes the accessible region using i) the number of reads mapping in these regions as a measure of the accessibility or ii) the sequence content within these accessible regions to determine regulatory topics. -\section{Open questions} +% \section{Open questions} -\begin{figure}[!htbp] -\begin{center} - \includegraphics[scale=0.5]{images/ch_atac-seq/pipeline.png} - \captionof{figure}{\textbf{framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.} -\label{atac_seq_pipeline} -\end{center} -\end{figure} +% \begin{figure}[!htbp] +% \begin{center} +% \includegraphics[scale=0.5]{images/ch_atac-seq/pipeline.png} +% \captionof{figure}{\textbf{framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.} +% \label{atac_seq_pipeline} +% \end{center} +% \end{figure} + +Al% l these methods have shown good performances to identify know and new cell populations [REFERENCES]. However, some issues remains open. First, none of these methods uses DGF data to identify different types of footprints or chromatin architecture, in terms of signal shape, at the single cell level. Second, ATAC-seq measures chromatin accessibility but also provides information about the nucleosome occupancy at accessible genomic regions \citep{buenrostro_transposition_2013}. Thus counting the number of reads mapping at a given loci is, indeed, an indication of accessibility but it does use only a small fraction of the available information. Finally, to date, no study has tried to determine whether what is observed at the bulk level can also be seen at the individual cell level and whether this can be used to infer the molecular state of the cells. + +% In this project, I designed and developed the basements of a computational framework to construct a catalog of prototypical chromatin architectures from single-cell data that can later on be used to annotate individual regions, in single cell. Such a method can be useful to determine cellular molecular state and to group cells accordingly. The entire pipeline is illustrate in Figure \ref{atac_seq_pipeline}. -All these methods have shown good performances to identify know and new cell populations [REFERENCES]. However, some issues remains open. First, none of these methods uses DGF data to identify different types of footprints or chromatin architecture, in terms of signal shape, at the single cell level. Second, ATAC-seq measures chromatin accessibility but also provides information about the nucleosome occupancy at accessible genomic regions \citep{buenrostro_transposition_2013}. Thus counting the number of reads mapping at a given loci is, indeed, an indication of accessibility but it does use only a small fraction of the available information. Finally, to date, no study has tried to determine whether what is observed at the bulk level can also be seen at the individual cell level and whether this can be used to infer the molecular state of the cells. +\section{Open issues} -In this project, I designed and developed the basements of a computational framework to construct a catalog of prototypical chromatin architectures from single-cell data that can later on be used to annotate individual regions, in single cell. Such a method can be useful to determine cellular molecular state and to group cells accordingly. The entire pipeline is illustrate in Figure \ref{atac_seq_pipeline}. +I identified two interesting question with regard to ATAC-seq data. First, in the previous chapters, I studied how chromatin is organized in the vicinity of TF binding sites using a pretty standard combination of ChIP-seq, DNase-seq and MNase-seq data. However, I wanted to asses to what extend the same could be done with less and cheaper to produce data. Second, I wonder to what extent single-cell data could be pooled together and used as a bulk sequencing experiment. \section{Data} To this end, I choose to work with a publicly available single-cell ATAC-seq dataset from 5'000 human blood monocytes from a healthy donor. These data have been produced by 10xGenomics (\url{https://www.10xgenomics.com}). -10xGenomics is one of the most promising and fast growing company specialized in sequencing technologies in the San Francisco Bay area \citep{hepler_10x_2018}. The core activity is to sell sequencing technologies and data analysis softwares to public and private entities. To advertise their products, 10xGenomics offer a free access to several high quality single cell datasets. +% 10xGenomics is one of the most promising and fast growing company specialized in sequencing technologies in the San Francisco Bay area \citep{hepler_10x_2018}. The core activity is to sell sequencing technologies and data analysis softwares to public and private entities. To advertise their products, 10xGenomics offer a free access to several high quality single cell datasets. -To demonstrate the capabilities of their sequencing and bioinformatics analysis technologies, pre-processing such as mapping, cell demultiplexing, sequencing adapters trimming, quality control checks have already been performed. Thus working with these data require minimum handling. Additionally, some downstream analyses such as peak calling or clustering have already been performed. For these reasons, this dataset offers all the conditions to be used as a standard to develop and benchmark new analyses methods. +10xGenomics is a company active in the field of sequencing technologies and data analysis softwares. To demonstrate the capabilities of their sequencing and bioinformatics analysis technologies, 10xGenomics offer a free access to several high quality single cell datasets together with their analysis results. Thus pre-processing steps such as mapping, cell demultiplexing, sequencing adapters trimming, quality control checks have already been performed. Thus working with these data require minimum handling. Additionally, some downstream analyses such as peak calling or clustering have already been performed. For these reasons, their datasets offer all the conditions to be used as a standard to develop and benchmark new analyses methods. Hg19 mapped reads were downloaded in bam format from \url{http://s3-us-west-2.amazonaws.com/10x.files/samples/cell-atac/1.1.0/atac_v1_pbmc_5k/atac_v1_pbmc_5k_possorted_bam.bam} and the corresponding peaks called on the aggregated data were downloaded in bed format from \url{http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_5k/atac_v1_pbmc_5k_peaks.bed}. -\section{Identification of catalog of chromatin architectures} +\section{Identifying over-represented signals} The study of signal shape (distribution) has been a quite active field for bulk sequencing experiments during the last decade. Dedicated algorithms \citep{hon_chromasig:_2008} \citep{nielsen_catchprofiles:_2012} \citep{kundaje_ubiquitous_2012} \citep{nair_probabilistic_2014} \citep{groux_spar-k:_2019} have been developed to cluster genomic regions based on their distribution of reads, which reflects their function. The major issue faced where i) to assess whether two regions had the same signal, they had to be properly aligned, ii) even if the regions were properly aligned, they had to be properly oriented and iii) the signal may be sparse due to an different sequencing depth. \subsection{ChIPPartitioning : an algorithm to identify over-represented read patterns} +ChIPPartitioning is an algorithm that has been developed by \cite{nair_probabilistic_2014} to classify regions based on their sequencing profiles and to identify archetypical sequencing densities (or models). Because the algorithm is already presented details in section \ref{encode_peaks_chippartitioning}, it will not be discussed further here. Nonetheless, the reader is invited to read the above mentioned section in order to properly understand the points discussed below. + +% Most of the above mentioned algorithms and softwares deal with some of these issues. However, ChIPPartitioning \citep{nair_probabilistic_2014} (see section \ref{encode_peaks_chippartitioning}) is really interesting. It is a probabilistic partitioning method that softly clusters a sets of genomic regions represented as a vector of counts corresponding to the number of reads (ChIP-seq, DNase-seq) along them. The regions clustered based on their signal shape resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. + +% This algorithm models the signal over a region of length $L$ has having being sampled from a mixture of $K$ signal models, using $L$ independent Poisson distributions. The number of reads sequenced over this region is then the result of this sampling process. The entire set of regions is assumed to have been generated from a mixture of $K$ different signal models (classes). Each class is represented by a vector of $L' \le L$ values that represent the expected number of reads at each position for that class. These values are thus the Poisson distribution parameters. + +% In order to discover the $K$ different chromatin signatures in the data, the algorithm proceed to a maximum likelihood estimation of the Poisson distribution parameters using an expectation-maximization (EM) framework. Given a set of $K$ models, the likelihoods of each region given each class is computed. A posterior probability of each class given each region can, in turn, be computed. These probabilities can be interpreted as a soft clustering. The parameters of the classes are updated using a weighted aggregation of the signal. Since each region is computed a probability to belong to each class, it participates to the update of all the classes, with different weights. + +% If the length of the chromatin signature searched $L'500bp) unassigned.} \label{atac_seq_fragment_size} \end{center} \end{figure} \begin{figure} \begin{center} \includegraphics[scale=0.4]{images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png} \captionof{figure}{\textbf{Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf{Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf{Middle row :} each position of the reads were used. \textbf{Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).} \label{atac_seq_ctcf_all_data} \end{center} \end{figure} If a TF can protect a stretch of DNA against transposition and create a footprint, so can a nucleosome. As a matter of fact, both cases are biologically drastically different. Nucleosome compete with TFs to bind on DNA \citep{voss_dynamic_2014}. Thus nucleosome footprints represent regions of the genome that cannot be bound by TFs, if we except pioneering factors \citep{cirillo_opening_2002,zaret_pioneer_2011,soufi_pioneer_2015}. Mixing nucleosome and TF footprints could bias downstream analyses. Nucleosomes fragments are expected to be large, as a nucleosome is wrapped by \~150bp of DNA whereas nucleosome free region fragments can be expected to be shorter. Long nucleosome free region fragments are unlikely. The longer an accessible region is, the most likely an insertion will happen resulting in the creation of two shorter fragments. A fragment size analysis allowed to identify different categories of fragments (Figure \ref{atac_seq_fragment_size}). In this figure, open regions, mono- and di-nucleosome fragments are clearly visible. Morever, a 10bp periodicity oscillations reflecting the DNA pitch is also visible. This pattern is expected and indicates a good data quality \citep{buenrostro_transposition_2013}. Rather than assigning arbitrary fragment size threshold to separate the categories, I preferred to use the approach developed by \citep{buenrostro_transposition_2013}. The fragment sizes were fitted by a mixture of three Gaussian distributions. Then, the limits for each fragment class was defined as the size at which the probability of assignment to that fragment class dropped under 0.9 (Figure \ref{atac_seq_fragment_size}B). This method ensured the classification of 134 millions of fragments, leaving ~46 millions reads unassigned (Figure \ref{atac_seq_fragment_size}C). However, this reduces drastically the risks of fragment mis-classification and protects the downstream analyses from a strong bias. \subsection{Measuring open chromatin and nucleosome occupancy} \begin{figure} \begin{center} \includegraphics[scale=0.3]{images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png} \captionof{figure}{\textbf{Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.} \label{atac_seq_ctcf_sp1_myc_ebf1_footprint} \end{center} \end{figure} Once the different fragment populations have been identified, the next question to solve is how should each category of fragment be represented? First, for open chromatin fragment, it is clear that we want to know where the DNA is accessible. This information is provided by the fragment edges – the transpositions sites. However, to account for the fact that the Tn5 transposase acts as a homo-dimer and inserts two barcodes side by side \citep{adey_rapid_2010}, the fragment edges positions were modified by +4bp for reads mapping the + strand and -5bp for reads mapping the - strand, as done in other studies \citep{buenrostro_transposition_2013,li_identification_2019}. Second, for mono and di-nucleosome fragments, we are interested in knowing where the nucleosomes are sitting. For this, the fragment edges may not be the most informative. A better way to represent those fragments would be to use the center positions, which should correspond to the dyad for mono-nucleosomes or even to consider the entire reads or fragments. To test these hypotheses I investigated the different signal aggregations around predicted CTCF binding sites using. The signal, +/- 1kb around the motif, was aggregated inside bins of 1, 2 or 10bp size. CTCF predicted binding sites were good candidates because CTCF is know to bind mostly through its motif (\cite{neph_expansive_2012} and Figure \ref{encode_peaks_gm12878_motif_prop}). Additionally CTCF binding produces a really typical chromatin architecture with strongly positioned nucleosomes arrays \citep{fu_insulator_2008} and leaves a footprint \citep{neph_expansive_2012}. As seen in Figure \ref{atac_seq_ctcf_all_data}, entire open chromatin reads and fragments do not allow to visualize a footprint signature (upper and middle rows, red lines). Both of them, nonetheless highlight open chromatin regions. The footprint becomes visible when considering the edges of the open chromatin fragments (bottom row, red line). Increasing the bin size blurs it and eventually makes it disappear (10bp, lower right). Regarding nucleosomes, considering the entire fragments blurs the signal (upper row, blue and green lines) and the entire reads reveal the region upstream and downstream of the nucleosomes (middle row, blue and green lines). The only way to obtain a precise nucleosomes occupancy information was to use the middle position of the mono-nucleosome fragments (bottom row, blue line). Interesting enough, the middle position of di-nucleosome fragments indicates the DNA linker between two adjacent nucleosomes but does not accumulate in open chromatin regions (bottom row, green line). This suggested that di-nucleosome fragments could be separated in two mono-nucleosome fragments. I tested this hypothesis by simply dividing a di-nucleosome fragment in two smaller ones, at its center position. I then pooled these new fragments with the mono-nucleosome fragments to create a nucleosome fragment dataset. When looking at the middle of these fragments, they could perfectly reveal the nucleosomes directly adjacent to the CTCF motif. Additionally this nucleosome dataset was also able to reveal a second nucleosome in the arrays (bottom row, violet lines). To further support these results, I also measured the chromatin organization (+5/-4 corrected read edges for open chromatin and center of the nucleosome fragments from the nucleosome fragment dataset) around SP1, myc and EBF1 binding motifs as well. As shown in Figure \ref{atac_seq_ctcf_sp1_myc_ebf1_footprint}, the aggregation of the signal arount the CTCF and SP1 motifs show an enhanced accessibility on the motif as well as a clear footprint. Moreover, the footprint is in a nucleosome free region. The situation was different for myc and EBF1. Neither of the two aggregations showed a nucleosome free region, nor an increased accessibility around the motif. Regarding myc, even though its aggregation presented a signal compatible with a local protection of its motif, this was shallow in comparison of CTCF and SP1. Finally, EBF1 presented a somewhat decreased accessibility around its motif and a striking increase accessibility directly at the level of the motif. CTCF and SP1 motifs are supporting the fact that footprints and nucleosome occupancy can be revealed using this method. Together with myc and EBF1, they clearly show an heterogeneity of chromatin organizations, at least at the aggregation level. There are many possible explanations for these results. One of them is that the aggregation hides the variability of the individual regions and that SP1 and CTCF present a more conserved organization around their motif than myc and EBF1. Another would that the most visible and obvious footprint reflect an stronger TF activity. However, one should remain cautious on the interpretation of aggregation patterns as the individual sites signal may interfere with each other, creating an artificial aggregation that does not exist at any individual site \citep{kundaje_ubiquitous_2012}. In the light of these results, I decided to use the +5/-4 corrected edges of the open chromatin reads to investigate footprints and the fragment centers of the newly created nucleosome dataset to investigate nucleosome occupancy. If not explicitly stated otherwise, the reader should consider that any signal is measure is performed using this procedure if not explicitly state otherwise. \subsection{Evaluation of EMSequence and ChIPPartitioning} Before continuing further in the analysis of the data, it was important to assess the performances of these partitioning methods to discover sequence motifs and footprint classes. % To evaluate the behavior of each algorithm, a simple situation was considered. As in the previous section, a list of predicted CTCF, SP1, EBF1 and myc binding sites were compiled using a PWM genome scan. For each TF, the open chromatin read density, the nucleosome occupancy and the DNA sequences were extracted. % This case can be considered simple for two reasons. First, for a given TF, all the regions have a motif instance. Second, this motif instance is located exactly in the center of the region, in the same orientation. Thus no shifting nor flipping is required in order to reveal footprints. The only necessary thing may be to discover different types of footprints. \subsubsection{EMSequence} \begin{figure} \begin{center} \includegraphics[scale=0.4]{images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png} \captionof{figure}{\textbf{Classification performances on simulated data :} \textbf{Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf{Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.} \label{atac_seq_emseq_auc_roc} \end{center} \end{figure} \begin{figure} \begin{center} \includegraphics[scale=0.35]{images/ch_atac-seq/sp1_motifs_7class.png} \captionof{figure}{\textbf{SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.} \label{atac_seq_emseq_sp1_10class} \end{center} \end{figure} In order to measure the ability of EMSequence to retrieve over-represented motifs from a set of sequences, I simulated 2'000 synthetic DNA sequences of 100bp long. The sequences were separated in two classes. Each class was defined by a 8bp sequence motif (Figure \ref{suppl_atac_seq_emseq_best_motifs}). Each sequence had exactly one motif occurrence, anywhere in the sequence (with a uniform probability), on either strand (equiprobable). These sequences were partitioned with flipping into 2 classes by EMSequence in order to find 2 motifs of 11bp ($100bp - 11bp + 1 = 90$bp of shifting). The optimization was run for 200 iterations. To assert the quality of the motifs discovered, I set up a classification framework inspired by PWMEval-ChIP-peak (see section \ref{section_smileseq_pwmeval}). Using equation \ref{smile_seq_pwmeval_score}, each sequence was scored with both model of each partition and the area under the curve (AUC) of the receiver operator characteristic (ROC) value was computed for each partition. The same was done using the true motif models. Because EMSequence is sensitive to its initial state, 50 partitions were performed. As shown in Figure \ref{atac_seq_emseq_auc_roc}, the de-novo discovered models are as good as the actual sequence motifs to segregate both sequence classes. Additionally, a visual inspection of the discovered motif logo confirmed that most of the discovered motifs actually match the true sequence motifs (Figure \ref{suppl_atac_seq_emseq_best_motifs}). In order to further demonstrate the ability of EMSequence on a more significant biological case, I investigated SP1 sequence specificity. As for ChIPPartitioning, a list of 15'883 predicted SP1 binding sites were compiled using a PWM genome scan. The sequences +/- 400bp around the motif instance centers were extracted. Thus, all regions contained at least one SP1 site. Thus, retrieving the SP1 binding site is expected. Additionally, as SP1 tends to bind to promoters [REFERENCE], we cannot exclude to see other motif being being discovered. These sequences were then given to EMSequence to search for several different 31bp long motifs ($801 - 31 + 1 = 771$ of shifting freedom). The optimization was run for 20 iterations. The motifs that were retrieved matched the expectations (Figure \ref{atac_seq_emseq_sp1_10class}). All classes retrieved an SP1 motif. Four classes (1,5,6,7) retrieved a single SP1 motif. Even though they are highly similar, they vary in term of flanking regions (class 6 versus 7 for instance). Class 3, which contained a surprisingly long motif, representing 24\% of the data, actually captured a LINE element. Indeed, the "GCAGCGAGGCTGGGGGAGGGGC" is characteristic of it (determined using BLAT \citep{kent_blatblast-like_2002} on the UCSC Genome Browser). Finally, and more interesting, classes 2 and 4 could capture two rare (about 1\% of the cases each) tandem repeats of SP1 motifs with two different spacers (1 and 9bp). Additionally, head-to-head SP1 motif repeats could be detected (\ref{suppl_emseq_sp1_10class}, classes 1 and 4). This suggested that SP1 binds as i) an homo-dimer or ii) as an hetero-dimer with another member of its family, binding a resembling motif. Moreover, the tandem and heat-to-head motif repeats suggested that different structural arrangement exist. According to BioGrid \citep{chatr-aryamontri_biogrid_2017}, SP1 has been reported to physically interact with SP1 (homo-dimer), SP3 and SP4 (hetero-dimer). According to JASPAR 2018 matrix clustering \citep{castro-mondragon_rsat_2017}, the KLF and EGR families recognizes similar motifs. Members of either families are also listed as SP1 interactors in Biogrid (KLF4, KLF6, KLF9, KLF10 and EGR1). The lack of non-SP1 motif discovered could be explained by at least one reason. The list of SP1 binding sites compiled was performed using a quite stringent threshold. The consequence is that the motif instances are highly similar to each other. This makes SP1 motifs strongly dominant within the dataset. Since EMSequence optimizes a set of models, it is highly sensitive to its starting state. In this experiment, EMSequence was initialised randomly. Because of the dominance of SP1 motifs within the data, it is likely that the different classes were attracted by them rather than allowed to diverge to detect other motifs. Together, these evidences support the fact that EMSequence is suited to perform a meaningful partition of DNA sequences and to retrieve biologically important DNA motifs. \subsubsection{ChIPPartitioning} \begin{figure} \begin{center} \includegraphics[scale=0.30]{images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png} \captionof{figure}{\textbf{Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.} \label{atac_seq_emread_ctcf_noshift_flip} \end{center} \end{figure} \begin{figure} \begin{center} \includegraphics[scale=0.35]{images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png} \captionof{figure}{\textbf{Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.} \label{atac_seq_emread_ctcf_shift_flip} \end{center} \end{figure} A complete benchmark of the ChIPPartitioning has been performed in \citep{nair_probabilistic_2014}. In this paper, the authors have generated simulated ChIP-seq data with patterns to retrieve, at different coverages and compared the performances with other similar software. It turned out that ChIPPartitioning was the best performing method. For this reason, I did not repeat this benchmark. However, ChIPPartitioning ability to retrieve footprint classes from from ATAC-seq data has not been performed yet. To evaluate this, a simple situation was considered. As in the previous section, a list of predicted CTCF and SP1 binding sites were compiled using a genome scan with suited binding models. For each TF, the open chromatin read density around these sites was measured +/-400bp aroud the motif instances, at the single base pair resolution, and classified. As the motif instances were already aligned in the center of the regions, no shifting was used. However, the region orientations were not corrected based on the strand on which the motif instance appeared. To evaluate the capability of ChIPPartitioning to retrieve classes of footprints, these data were classified i) without shifting and with flip (Figure \ref{atac_seq_emread_ctcf_noshift_flip} and Figure \ref{suppl_emread_sp1_noshift_flip}) and ii) with shifting and flipping (Figure \ref{atac_seq_emread_ctcf_shift_flip} and \ref{suppl_emread_sp1_shift_flip}). First, in both conditions - with and without shifting - different open chromatin signal classes have been discovered. Second, in most cases, the chromatin accessibility is anti-correlated with the nucleosome occupancy, which is something expected. However this is not always the case, such as in Figure \ref{atac_seq_emread_ctcf_noshift_flip} classes 3 and 6. Such pattern may reflect a complex chromatin architecture, with variably positioned nucleosomes, that the partition cannot realign. But it is also likely to be an artifactual signal caused by the partition itself. Third, allowing the regions to be flipped based on the chromatin accessibility signature (Figure \ref{atac_seq_emread_ctcf_noshift_flip} and Figure \ref{suppl_emread_sp1_noshift_flip}) does not allow to resolve properly the orientation of the underlying CTCF and SP1 motif instances. Indeed, the sequence logos, in the center, are symetric indicating a superposition of +strand and -strand motif. Finally, allowing a moderated shifting freedom (+/- 10bp, Figure \ref{atac_seq_emread_ctcf_shift_flip} and \ref{suppl_emread_sp1_shift_flip}) results in blurred out sequence logo. This indicates that the chromatin accessibility signal realignment unphased the underlying CTCF and SP1 motif instances. Thus the signal that is observed does not represent classes of footprints. In this case, each the region contained a motif instance at its center. Nonetheless, even a limited shifting according to the open chromatin signal resulted in the dephasing the underlying motif instances. Trying to resolve the motif orientation by allowing flipping according to the open chromatin was not more successful. Thus, discovering footprint classes from a highly unaligned set of regions does not seem to be possible. Workaround strategies have to be found. \section{Aligning the binding sites} \begin{figure} \begin{center} \includegraphics[scale=0.30]{images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png} \captionof{figure}{\textbf{Central parts of the extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.} \label{atac_seq_23class} \end{center} \end{figure} To create a catalog of chromatin architectures around TF binding sites in monocytes, it is necessary to be able to align the regions of interest properly (with respect to the binding sites) or to have methods able to deal with this issue. The list of active regulatory regions was assumed to correspond to regions of high ATAC-seq signal. Consequently, I choose to used the peak list generated by 10xGenomics for this dataset as the list of regulatory regions of interest. Because ChIPPartitioning did not seem to be able to realign regions containing regulatory elements based on their chromatin accessibility profiles, I decided to use EMSequence to i) find TF binding sites in each region and ii) align the regions based on the occurrence of these TFs. Because EMSequence creates a probabilistic alignment, it can deal with the fact that each region can bear an several binding sites from different TFs. Finally, a de-novo motif discovery was not needed strictly speaking. Instead, I restricted the analysis on a priori important TFs using a single iteration of EMSequence. A handful of important TFs in monocytes \citep{kurotaki_transcriptional_2017,rico_comparative_2017} were selected : jun, HIF1a, myc, PU.1, CEBPB, IRF2, IRF4, IRF8, LHX3, FOXH1, SOX, MEF2c, ELF5, STAT6, NFE2, AHR, E2F2, E2F3, KLF2, KLF4 and NR4A1. Additional TFs and TF families were selected to widen spectrum of TF families included in the analysis : CTCF, the EGR family, the GATA family, the NFAT family and the RUNX family. Because TFs within a given family tend to bind the same motif (for instance IRF4 and IRF8 or E2F2 and E2F3), binding models representative for sets of TFs were selected from the JASPAR database motif clustering \citep{castro-mondragon_rsat_2017}. In total, 23 binding models were used to initialize as many classes for EMSequence to discover. EMSequence was run for one iteration to classify 70'462 sequences of 1001bp centered on the ATAC-seq peaks, with flipping and 971 of shifting freedom (thus searching motifs of 30bp). Based on the alignment and the data, the resulting 30bp ATAC-seq and sequence and read density models were then extended of 500bp on each side to reveal the organization of regulatory sequences (Figures \ref{suppl_atac_seq_23class} and \ref{atac_seq_23class}). First, from the class aggregations, footprints are clearly visible over the TFs binding motif. This strongly suggest that the region realignment worked properly. Second the 23 different classes showed various types of footprints. For instance, CTCF shows its usual strongly positioned nucleosome arrays together with a clear chromatin opening over the motif supporting CTCF binding. The important monocyte TF PU.1 also shows an increased chromatin accessibility at its binding sites. However, the footprint drastically differ from CTCF in the sense that a clear a wide signal drop - larger than PU.1 motif only - is visible. It is also concomitant with an increased nucleosome occupancy. Conversly, LHX3 shows a pattern that rather suggest a modest chromatin opening. Finally, KLF's family binding sites a strong chromatin accessibility rather than a protection of the bound sequences, which may be compatible with a transcriptional repressive activity \ref{berest_quantification_2018}. Third overall class probabilities gives an indication of the regulatory element content in term of motif. Its seems that CTCF motif is the most common one even though it does not mean that each motif instance is bound or even functional. \section{Exploring individual TF classes} \begin{figure} \begin{center} \includegraphics[scale=0.30]{images/ch_atac-seq/data_classCTCF_8class.png} \captionof{figure}{\textbf{CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.} \label{atac_seq_ctcf_subclass} \end{center} \end{figure} The results shown in the previous section are per TF aggregation profiles. Thus a further exploration of each class is required to investigate whether several different footprint classes can be isolated per TF. To do so, I extracted the data assigned to each class and run ChIPPartitioning on these data. Because the regions have already be aligned properly to have the TF binding motif in their center, ChIPPartitioning was not allowed shifting nor flipping. As expected, applying this method on the CTCF, PU.1 and AP1 classes refined the results. For instance, the CTCF class data classification (Figure \ref{atac_seq_ctcf_subclass}) showed sub-classes in which CTCF motif instances were likely not bound (classes 8, 6, 7, 2 and 5) as well as sub-classes in which they were likely bound (classes 4, 3 and 1). In the latter group, several chromatin organizations could be revealed, with approximately 35\% of the motif instances showing the canonical CTCF chromatin organization (class 4). The same is illustrated for PU.1 and AP1 classes (Figure \ref{suppl_atac_seq_pu1_subclass} and \ref{suppl_atac_seq_ap1_subclass}). In both cases, it was possible to identify bound and unbound motif instance sub-classes. Also, for these two TFs, the nucleosome are not visible, in line with my previous results showing that only CTCF has nucleosome arrays organized with respect to its binding sites (see Chapter \ref{encode_peaks}). \section{Discussions} Even though preliminary, these results showed that this computational framework can turn useful to analysis the chromatin organization around TF binding sites using ATAC-seq data. First, not much of a surprise, applying population level analyses to the pool of single cell data gave meaningful results. Second, ChIPPartitioning turned out to be useless to properly phase unaligned regions based on their chromatin accessibility patterns. Instead, the newly proposed EMSequence algorithm turned out to be usable for this task, in a special setting, and was able to produce a meaningful per TF data realignment. As a reminder, short models were searched (and thus large shifting freedom was set). This alignment was then used to realign larger regions and revealed footprints. Also, a priori knowledge was fed in the form of the initial sequence models values taken from TF binding model databases. Third, I presented a method to extract data assigned to a class, from a probabilistic partition, without using any hard assignment shortcut. Running ChIPPartitioning on these data then turned out to revealed different chromatin organization for each TF, allowing to distinguish between likely bound and unbound motif instances. \section{Perspectives} As a fact, in its current form, this framework is incomplete. For instance, a method to estimate the fit of a given partition and choose the best one is required. This would help choosing the appropriate number of classes to search. This could be implemented using the Akaike information criterion \citep{marsland_machine_2015-1}. So far, most of the single cell technologies are targeted at measuring gene expression through scRNA-seq. Cell sub-population detection by clustering expression matrix \citep{fan_characterizing_2016, kiselev_sc3:_2017}, using gene regulatory network reconstruction \citep{aibar_scenic:_2017} or by identifying cellular states based on the accessible region motif content \citep{gonzalez-blas_cistopic:_2019} are popular. Currently, the use of ATAC-seq data at the single cell level is mostly limited to a binary open/closed classification. One can imagine using the above described framework to draw a catalog of chromatin structures from the pooled data and use it to annotate each cell. More precisely this could be done by going back to each peak in each cell and assigning a qualitative label corresponding to the chromatin model that matches the best this region in this cell. Ultimately, this would lead to the creation of a matrix (cells x regions) that could be used to run clustering methods. How the similarity should be computed and whether each cell will have a high enough coverage for similarity computations to be meaningful remain open questions. Alternatively, one can imagine replacing single cell by different bulk experiments. In this case, the clustering would not isolate cell sub-populations but experiments (individuals, culture conditions, etc) that are similar to each other. \section{Methods} \subsection{Implementations} In order to allow an easy handling and a quick treatment of the data, the algorithms and procedures described above have been implemented in C++ and fully multi-threaded. Here is a list of the relevant C++ implementations : \begin{itemize} \item EMRead : implementation of the EMRead algorithm. Takes a read count data matrix as input, the number of classes, the shifting and flipping parameters and return the posterior probabilities. \item EMSequence : implementation of the EMSequence algorithm. It takes a DNA sequence data matrix as input, the number of classes, the shifting and flipping parameters and return the posterior probabilities. % \item EMJoint : implementation of the generalized EMJoint algorithm. It takes any number of data matrix as input, the number of classes, the shifting and flipping parameters and return the posterior probabilities. This program can be given 0 or 1 DNA sequence matrix and any number of read count matrices as input. \item ProbToModel : implementation of the data realignment procedure. It takes a data matrix (DNA sequence or read counts) and some posterior probabilities as input and returns the class models. \end{itemize} \subsection{Fragment classes} The distribution of size was modeled as a mixture of three classes, each following a Gaussian distribution. Each class fragment length distribution was modeled using : \begin{equation} \begin{aligned} f(x) & = a \times \exp^{\frac{-(x-m)^{2}}{2 \times s}} \end{aligned} \label{atac_seq_fragment_length_class} \end{equation} where $x$ is the fragment length, $m$ the mean fragment length for this class, $s$ the fragment length standard deviation and $a$ an amplitude factor. The mean parameters were initialized to 50, 200 and 300bp. The standard deviation parameters were initialized to 10, 10 and 30bp and the amplitude factors to 1. The parameters were fitted to the data using the the nls() function in R implementing the Gauss-Newton algorithm. \subsection{Simulated sequences} TODO % I simulated 2'000 synthetic DNA sequences of 100bp long. The sequences were separated in two classes. Each class was defined by a 8bp sequence motif (Figure \ref{suppl_atac_seq_emseq_best_motifs}). Each sequence had exactly one motif occurrence, anywhere in the sequence (with a uniform probability), on either strand (equiprobable). The motif sequence was sampled using the corresponding class model. Finally, the bases outside the sequence were sampled using a mono-nucleotide model with 0.25 probability for each base. \subsection{Realignment using JASPAR motifs} -TODO +23 binding models were downloaded from the motif clustering of JASPAR \citep{castro-mondragon_rsat_2017}. Briefly, the motif clustering is made of a forest of trees (each tree is a cluster= in which the leaves are the individual TF binding models (motifs). Internal nodes binding models are also available. As a matter of fact, they represent a consensus over multiple individual TF binding models. In order to i) have models representing the binding specificity of the TFs of interest and ii) widen the analysis to other TFs if they were sufficiently related to one of the TFs of interest in terms of specificity, I manually selected binding motifs, in the different motif trees, that would fit these requirements. -\subsection{Display of motif logo} -TODO +The downloaded models were : + +\begin{table} +\begin{center} + \begin{tabular}{ |l|l|p{90mm}|l| } + \hline + \multicolumn{4}{|c|}{Binding models downloaded} \\ + \hline + Cluster ID & Node ID & TFs covered & Name \\ + \hline + 1 & 74 & ARID3b, LHX3 & LHX3 \\ + 2 & 12 & ESRRG, NR4A1, ESRRB, NR2F2 & NR4A1 \\ + 3 & 23 & FOSL1::JUNB, FOSL1::JUN, FOS::JUND, \newline FOSL2::JUN, FOS::JUNB, JDP2, NFE2, FOSL1, FOS, JUND, FOSL2, JUNB, JUN::JUNB, FOSL1::JUND, FOS::JUN, FOSL2::JUND, FOSB::JUNB, FOSL2::JUNB, BATF::JUN, JUN & AP1 \\ + 3 & 24 & NFE2L2, BACH1::MAFK, MAF::NFE2, BACH2 & NFE2 \\ + 4 & 22 & max::myc, MXI1, myc, mycn & myc \\ + 4 & 30 & ARNT, AHR::ARNT & AHR \\ + 4 & 31 & HIF1A, HES5, HES7 & HIF1A \\ + 5 & 20 & CEBPA, CEBPG, CEBPD, CEBPB, CEBPE & CEBPB \\ + 7 & 13 & SPIC, SPI1 & PU.1 \\ + 7 & 17 & ELF5, ELF3, EHF, ELF1, ELF4 & ELF5 \\ + 19 & 2 & NFAT5,NFATC1,NFATC3 & NFAT \\ + 20 & 4 & MEF2C,MEF2B,MEF2A,MEF2D & MEF2C \\ + 21 & 5 & GATA3, GATA5, GATA4, GATA6, GATA1, GATA2 & GATA \\ + 28 & 13 & EGR2, EGR4, EGR1, EGR3 & EGR \\ + 28 & 14 & KLF4,KLF1,KLF9 & KLF \\ + 31 & 4 & IRF7, IRF9, IRF4, IRF8, IRF5 & IRF4 \\ + 31 & 5 & STAT1::STAT2, IRF2 & IRF2 \\ + 32 & STAT6 & STAT6 & STAT6 \\ + 33 & 1 & SOX3, SOX6 & SOX3 \\ + 38 & 3 & RUNX1, RUNX2, RUNX3 & RUNX \\ + 39 & 1 & E2F3, E2F2 & E2F2 \\ + 48 & CTCF & CTCF & CTCF \\ + 66 & 1 & FOXH1 & FOXH1 \\ + \hline + \end{tabular} + \captionof{table} { \textbf{TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url{http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.} +\label{atac_seq_motif_table} +\end{center} +\end{table} + +Each binding model was downloaded in JASPAR format and then converted to a per-position probability letter matrix. \subsection{Model extension} TODO \subsection{Extracting data assigned to a class} The difference between hard and soft clustering (such as ChIPPartitioning) methods is that in soft clustering "the output is a membership function, so each pattern can belong to more than one group with varying degrees of membership" \citep{dalton_clustering_2009} while in hard clustering each pattern is assigned to only one group. In the former case, isolating all regions assigned to a class $X$, creating a matrix of read density and re-running the clustering method on this matrix is straightforward and would do the trick. In the latter case, this is also possible but requires to account for the specificity described above. Let's assume that a first matrix $R$ of dimensions $NxL$ containing $N$ regions of length $L$ has been partitioned in $K$ classes by ChIPPartitioning, with shifting freedom $S 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf{B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf{C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref{encode_peaks_methods_data}).} \label{encode_peaks_ctcf_association} \end{center} \end{figure} \begin{table} \begin{center} \begin{tabular}{ |c|c|c|l|l|c|c| } \hline \multicolumn{7}{|c|}{Curated associations} \\ \hline TF$_{A}$ & TF$_{B}$ & Motif ass. & Type & Binder & Reported & Validated \\ \hline CTCF & ATF2 & pos & indep.co-bind & & no & no \\ CTCF & EBF1 & pos & indep.co-bind & & yes & no \\ CTCF & MAZ & pos & indep.co-bind & & yes & no \\ CTCF & NFYb & pos & indep.co-bind & & yes & no \\ CTCF & NFkB & pos & indep.co-bind & & yes & no \\ CTCF & PAX5 & pos & indep.co-bind & & yes & no \\ CTCF & SP1 & pos & indep.co-bind & & yes & no \\ CTCF & BATF & neg & indir.co-bind & BATF & yes & no \\ CTCF & ELF1 & neg & indir.co-bind & ELF1 & yes & no \\ CTCF & IRF4 & neg & indir.co-bind & CTCF & yes & no \\ CTCF & MEF2a & neg & indir.co-bind & both & yes & no \\ CTCF & MEF2c & neg & indir.co-bind & both & yes & no \\ CTCF & NFATc & neg & indir.co-bind & CTCF & no & no \\ CTCF & NFYa & neg & indir.co-bind & CTCF & yes & no \\ CTCF & NRF1 & neg & indir.co-bind & CTCF & yes & no \\ CTCF & NRSF & neg & indir.co-bind & CTCF & yes & no \\ CTCF & PAX5 & neg & indir.co-bind & both & yes & no \\ CTCF & POU2f & neg & indir.co-bind & POU2f & yes & no \\ CTCF & RUNX3 & neg & indir.co-bind & both & no & no \\ CTCF & SRF & neg & indir.co-bind & CTCF & yes & no \\ CTCF & USF1 & neg & indir.co-bind & both & yes & no \\ CTCF & YY1 & neg & indir.co-bind & CTCF & yes & yes\\ CTCF & ZNF143 & neg & indir.co-bind & CTCF & yes & no \\ \hline JunD & BHLHE40 & neg & indir.co-bind & BHLHE40 & yes & no \\ JunD & CTCF & neg & indir.co-bind & CTCF & yes & no \\ JunD & EBF1 & neg & indir.co-bind & EBF1 & yes & no \\ JunD & EGR1 & neg & indir.co-bind & EGR1 & yes & yes\\ JunD & ELK1 & neg & unknown & & no & no \\ JunD & IRF4 & neg & indir.co-bind & JunD & yes & yes\\ JunD & MAZ & neg & indir.co-bind & MAZ & no & no \\ JunD & PAX5 & neg & indir.co-bind & PAX5 & yes & no \\ JunD & SP1 & neg & indir.co-bind & SP1 & yes & yes\\ JunD & USF2 & neg & indir.co-bind & USF2 & yes & no \\ JunD & YY1 & neg & indir.co-bind & & yes & yes\\ JunD & ZBTB33 & neg & unknown & & yes & no \\ \hline \end{tabular} \captionof{table} { \textbf{Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep{chatr-aryamontri_biogrid_2017}.} \label{encode_peaks_association_table} \end{center} \end{table} The study of co-binding with CTCF showed that it was possible to detect global associations. I already detected that the cohesin complex members SMC3 and RAD21 form a complex with CTCF, as expected from literature [REFERENCE NEEDED]. Additionally, I detected that YY1 and ZNF143 are also frequently associated with CTCF. Thus, I decided to push forward in this direction. To this end, I set up a method based on motif co-occurrence to i) relieve the necessity of observing similar chromatin architectures, as in the previous section and ii) be able to functionally characterize the detected interactions. Several types of functional associations can occur between a TF$_{A}$ and a TF$_{B}$. Because each one of them brings different expected patterns in the data, it should be possible to detect and disentangle them. First two TFs can dimerize and bind to DNA using both DNA binding domains (DBDs) [REFERENCE NEEDED] (Figure \ref{encode_peaks_tf_association}A). I will refer to this case as \textbf{direct co-binding}. If this happens, both TF motifs are expected to appear in close vicinity, more often than by chance. Moreover, a spatial constrain (both spacing and orientation) reflecting the complex structure is also expected to occur. Second, two TFs can dimerize and bind to DNA using only one of the DBDs. This will result in having one of the TF bound to DNA while the other one will tether DNA through its interaction with the other TF (Figure \ref{encode_peaks_tf_association}B). This case will be referred to as \textbf{indirect co-binding}. In such a case, if TF$_{A}$ is the factor binding its motif and TF$_{B}$ is the tethering factor, both motifs are expected to repel (avoid) each other at TF$_{A}$ binding sites. Third, two TFs can both bind DNA using their own DBDs, in close vicinity but without any physical interaction (Figure \ref{encode_peaks_tf_association}C). In such as case, both motif$_{A}$ and motif$_{B}$ are expected to be enriched at both TF$_{A}$ and TF$_{B}$ binding sites. However, no spatial constrain is expected between the motifs. This case will be refered to as \textbf{independent co-binding}. This can be caused by a temporal relationship between both TFs where both TFs can bind to a given region asynchronously. For instance, a first TF is recruited to its binding site and ensures - somehow - a proper chromatin environment for another TF, such as illustrated during macrophage and B cells progenitors commitment \citep{heinz_simple_2010}. Finally, in case of a partial or total motif overlap, both TFs may be observed to be bound together (Figure \ref{encode_peaks_tf_association}D). In such a case, different phenomenons may explain this observation. A first possible explanation would be that two TFs compete to bind to the same region. Observing both TFs bound together could be due to an overlap of data from different cells in which only one TF is bound at the time. A second possible explanation would be that, for some reason, only one TF is bound, never the other. However, I prefer to be cautious regarding the causal mechanisms and this case will be referred to as an \textbf{interference}. In order to collect more evidences about functional connections between TFs, I developed a simple analysis pipeline able to detect the expected patterns of motifs described above. Briefly, given a set of binding sites for a TF$_{A}$, it is possible to construct a contingency matrix containing the number of binding site with i) motif$_{A}$ and motif$_{B}$, ii) motif$_{A}$ only, iii) motif$_{B}$ only or iv) no motif and assess whether both motifs are associated or avoid each other using an exact Fisher test. Then, for pairs of motifs showing an association, displaying the spatial distribution of the motif may help to discriminate whether or not there is a spacing constrain or a motif overlap. I investigated the association of 47 TFs for which 53 datasetes were available in GM12878 cells with CTCF or JunD. CTCF was chosen because i) most of its binding sites have a short nucleosome depleted region and show only a peak of sequence conservation at the binding site leaving a restricted space for other motifs to co-occur (Figure \ref{suppl_encode_peaks_em_ctcf}) and ii) I already collected several observation regarding CTCF. JunD was chosen as a complementary example to CTCF in the sense that i) contrarily to CTCF, it is only a trancriptional regulator, ii) it is expected to bind to regulatory regions mostly thus to open chromatin regions where other motifs are expected to co-occur , iii) \url{~50}\% of the peaks have a motif versus \url{~80}\% to \url{~90}\% for CTCF peaklists (Figure \ref{encode_peaks_gm12878_motif_prop}). % motif co occurence Motif co-occurrence analysis suggested several interactions. Regarding CTCF motif (Figure \ref{encode_peaks_ctcf_association}A), 8 positive motif association (ATF2, EBF1, MAZ, NFYb, NFkB, PAX5, SP1, YY1) and 16 negative motif associations (BATF, ELF1, IRF4, MEF2a, MEF2c, NFATc, NFYa, NRF1, NRSF/REST, PAX5, POU2F2/OCT2, RUNX3, SRF, USF1, YY1 and ZNF143) with other motifs were found. Regarding JunD (Figure \ref{suppl_encode_peaks_jund_association}A), positive motif association with 2 others TF motifs (BATF, cFos) and 12 negative associations with others TF motifs (ATF2, BHLHE40, CTCF, EBF1, EGR1, ELK1, IRF4, MAZ, PAX5, SP1, USF2, YY1 and ZBTB33) were found. cFos and one of the YY1-Sydh peaklists displayed evidences of poor quality (not shown and annotated as such by the ENCODE Consortium). Additionally, ATF2 is an AP1 member which possess a 2bp spacer (TGANNTCA) while JunD is a 1bp motif space (TGANTCA). Thus the strong negative interaction may simply be due to the fact that both motifs are simply mutually exclusive. In consequence, the positive associations CTCF-YY1 and JunD-cFos and the negative association JunD-ATF2 should be ignored. Additionally, JunD and BATF motifs are the same as both these TFs belong to the AP1 family. In consequence, it is impossible to say whether BATF peaks harbour a JunD or a BATF site. Thus this association should be ignored as well, leaving no positive association left with JunD motif. % densities The analysis of CTCF and JunD motif occurrence densities (Figures \ref{encode_peaks_ctcf_association}B and C and Figure \ref{suppl_encode_peaks_jund_association}B and C) revealed further interesting details regarding possible association mechanisms. First, positive associations showed CTCF density patterns mostly compatible with the direct co-binding and the independent co-binding scenarios (see Figure \ref{encode_peaks_ctcf_association}B). However, making a clear distinction between both is often impossible. For instance, both EBF1 peaklists show a decrease in CTCF motif density \url{~10}bp after the peak followed by an increase which could represent the spacer between CTCF and EBF1. However this is followed by a rather wide CTCF motif presence, mostly suggesting an independent co-binding scenario. An interesting candidate for a direct co-binding with CTCF is RXRa (Figure \ref{encode_peaks_ctcf_association}B). Even though the motif association was not significant, a focused co-localization of both motif appears. Second, negative associations showed CTCF and JunD density patterns compatible with the indirect co-binding scenario where the TFs would tether through CTFC or JunD, i.e. the CTCF or JunD motifs do not show a spacing constrain with the binding sites but are rather spread over ~100bp around binding sites without their own motif (Figure \ref{encode_peaks_ctcf_association}C and Figure \ref{suppl_encode_peaks_jund_association}C). Interestingly, CTCF motif around YY1 and ZNF143 binding sites lacking their own motifs (see bottom of Figure \ref{encode_peaks_ctcf_association}C) showed really focused densities, indicating that for some reason, the CTCF motif is well localized. Even if unexpected, this observation is not incompatible with the indirect co-binding scenario and further supports the results from section \ref{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}. % results To summarize, the motif association statistics allowed me to identify 35 associations of TFs with either CTCF or JunD (Table \ref{encode_peaks_association_table}). The strongest negative interactions for CTCF were ZNF143 and YY1, supporting the results found in the previous sections. The analysis of CTCF and JunD motif spatial distributions around peaks and a closer examination of the contingency matrices allowed to suggest details about the interacting mechanisms, including which TF binds DNA. The only two exceptions were JunD-ELK1 and JunD-ZBTB33 for which the motif occurrence densities were uninformative. Finally, out of these 35 associations, 5 were supported by experimental evidences and 5 were not already reported in previous studies or databases \citep{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012, chatr-aryamontri_biogrid_2017}. \section{EBF1 binds nucleosomes} \begin{figure} \begin{center} \includegraphics[scale=0.4]{images/ch_encode_peaks/ebf1_haib_1.png} \captionof{figure}{\textbf{EBF1 binding sites} stand on the edge of a nucleosome. \textbf{A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep{gaffney_controls_2012}. \textbf{B} Dinucleotide frequencies around the dyads of the nucleosomes that have an EBF1 binding site within 100bp. \textbf{C} Motif frequency around the dyads of nucleosomes that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.} \label{encode_peaks_ebf1} \end{center} \end{figure} % As presented above (section \ref{encode_peaks_chippartitioning}), EBF1 binding sites does not seem to present a NDR seem to be covered by a nucleosome array. This observation suggest that EBF1 can bind to nucleosomal DNA. However, because ChIPPartitioning realigns the data, one possible explanation is that it failed to properly aligned the data and that the results do not reflect reality. % In order to clarify this, I looked at the MNase digestion profile - more specifically, at the distribution of nucleosome dyads - at EBF1 binding sites. EBF1 is a crucial factor for B cell development. It is necessary in the early steps, for a proper lineage commitment as well as later on during the entire B cell development \citep{boller_defining_2018}. Since many years, EBF1 has been though to be able to "pioneer early changes in the target gene chromatin necessary for transcriptional activation" and proper B cell development \citep{hagman_early_2005}. Experimental evidences supported that EBF1 could be able to bind compacted naive chromatin (without noticeable mark/modification), leading to a local chromatin opening, H3K4me2 deposition, DNA demethylation and gene activation \citep{maier_early_2004,boller_pioneering_2016}. If such features makes a lot of sense during lineage commitment, the some underlying mechanisms remained mysterious, especially how EBF1 primarily binds to closed chromatin. With regard to this, the results of section \ref{encode_peaks_chippartitioning}, suggesting that EBF1 binding sites may be covered by nucleosome arrays, rose my attention. In order to collect evidences that may shed light on this, I conducted a deeper exploration of the EBF1 binding sites. First, the distribution of nucleosome dyads - from two independent experiments - around EBF1 binding sites revealed a landscape that is compatible with a nucleosome positioned ~70bp apart from the binding sites (Figures \ref{encode_peaks_ebf1}A). This configuration would position the EBF1 binding site at the edge of the nucleosome. The 10bp periodicity visible suggested that other positioning of the EBF1 binding site exist but always at integer numbers of helix turn, such that the EBF1 binding site would always be positioned the same compared to the nucleosome surface. Surprisingly, the distribution of EBF1 motif remained the same, whether the nucleosome was containing an EBF1 bound site or not (Figure \ref{suppl_encode_peaks_ebf1_nucl}). Second, to support the fact that these EBF1 binding sites are indeed functional sites, I compared some of their chromatin features with the entire nucleosome pool. As expected, the presence of EBF1 binding sites was correlated with an increased accessibility (Figure \ref{suppl_encode_peaks_ebf1_chrom}A), even though the opening was spread rather than narrow. Furthermore, this increased opening was concomitant with an enriched H3K4me2 deposition (Figure \ref{suppl_encode_peaks_ebf1_chrom}B), in line with the literature. Last, it was also possible to highlight a higher sequence conservation at the nucleosome edges when they had an EBF1 binding site (Figure \ref{suppl_encode_peaks_ebf1_chrom}C), suggesting a functional difference between both nucleosome pools. % Finally, Trifonov's motif appeared along the nucleosome, EBF1 motif was rather present at the nucleosome edges. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif ({A/C}CCC{A/C} and {A/G}GGG{A/G}) at the cost of 2 or 0 missmatches. % A further inspection of the dinucleotide base composition in the nucleosome bearing an EBF1 binding site revealed a periodic pattern that is compatible with a rotationally positioned nucleosome (Figure \ref{encode_peaks_ebf1}B), as expected from literature in \citep{ioshikhes_variety_2011,gaffney_controls_2012}. % Finally, the occurrence of the nucleosome positioning motif - YRRRRRYYYYYR where Y is C/T and R is A/G - identified by Trifonov \citep{trifonov_cracking_2011} in these nucleosomes is antiphased with the occurrence of the EBF1 motif. If Trifonov's motif appeared along the nucleosome, EBF1 motif was rather present at the nucleosome edges. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif ({A/C}CCC{A/C} and {A/G}GGG{A/G}) at the cost of 2 or 0 missmatches. % These results suggest that EBF1 can bind nucleosomal DNA. In most cases, it seems that the EBF1 binding site is located at its edge. Incidentally, the high similarity between Trifonov and EBF1 motifs suggest that EBF1 binding sequence may have a nucleosome positioning property. Interestingly, EBF1 motif, as identified by JASPAR \ref{suppl_encode_peaks_ebf1_logo}, is 14bp wide. Consequently, it is conceivable that, wherever this motif is located along the nucleosome, at least part of remains facing outward and is thus "readable". % Based on this observation, I hypothesize that EBF1 may be a pioneering factor or that it influence nucleosomes positioning through its binding. In the first case, EBF1 would be able to target yet inaccessible loci upon the right cellular conditions. In the second case, EBF1 would rather serve to both open and close targeted sites by leading - directly or indirectly - to the positing of a nucleosome right beside of it binding site. Both scenarios make sense. Indeed, EBF1 is known to be crucial for B-cells commitment. In such developmental processes, specific enhancers are made accessible and active at different, in a coordinated manner, during the developmental process. (AND WHAT ABOUT CLOSING???) Third, a further inspection of the sequence composition of the nucleosomes bearing an EBF1 binding site revealed i) a periodic occurrence of antiphased WW (W=A/T) and SS (S=C/G) dinucleotides and ii) a periodic occurrence of the YRRRRRYYYYYR (R=A/G, Y=C/T) nucleosome positioning motif described by Trifonov \citep{trifonov_cracking_2011}. Together, these observations suggest that EBF1 binding sites are located on the edge of a rotationally positioned nucleosome \citep{ioshikhes_variety_2011,trifonov_cracking_2011,gaffney_controls_2012}. Interestingly, Trifonov's motif appeared in counter phased with EBF1 motif. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif (\{A/C\}CCC\{A/C\} or \{A/G\}GGG\{A/G\}) at the cost of 2 or 0 missmatches. These results suggest that EBF1 can indeed bind nucleosomal DNA. The motif bound were predominantly located at the edges of the nucleosomes. Yet, this was also the fact for nucleosome that do are not bind by EBF1. This suggests that nucleosomes are already in this position before EBF1 binding, which may be the case given the presence of favorable nucleosome positioning sequences. The reason why the EBF1 motif is already on the edges of nucleosome, even without EBF1 binding, remains unknown. One explanation could be that such sites have a double function. The first function would be to recruit EBF1 to open up the region. The second, would be that EBF1 binding sequence (together with other positioning sequences) can act as a barrier - a potential well - avoiding the nucleosome to roll over in this direction. Such a system would have the advantage of promoting a suited chromatin structure in developmentally important regions. Constraining nucleosome movement would could serve to hide regulatory elements. At the same time, these regions would remain responsive to differentiation signals through the exposition of EBF1 sites on the periphery of nucleosomes. \section{Methods} \subsection{Data and data processing} \label{encode_peaks_methods_data} All the GM12878 ENCODE data used were mapped against hg19 genome and can be found on the MGA repository \citep{dreos_mga_2018}. Peaks called by the ENCODE Consortium using their uniform processing pipeline \cite{gerstein_architecture_2012} were used. These peaks can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/Uniform-TFBS/Uniform-TFBS.html}. Assuming that a TF binds to DNA through motif recognition, the peak center should be localized on the motif center. Thus the center of each peak was moved to the closest motif instance within 60bp. To do so, each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \cite{mathelier_jaspar_2014}, HOCOMOCO v10 \cite{kulakovskiy_hocomoco:_2016} or Jolma \cite{jolma_dna-binding_2013} collection. Using the corresponding log-odd PWM, peak sequences were scanned to find motif instance with a score corresponding to a pvalue higher or equal to 1e-4. If such a motif instance was found, the peak position was shifted to the center of the motif instance and mapped to the corresponding strand. Otherwise, the peak position remained unchanged without strand information. In GM12878 cells, nucleosome occupancy was assessed using MNase-seq data released by the ENCODE Consortium (GSE35586). These data can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/GSE35586/GSE35586.html}. To increase sequencing depth, all replicates available for this cell line were pooled together, resulting in ~789 mio reads, and used as a single dataset. The resulting dataset is available and has the description "GM12878|Nucleosome|all (SLOW!)". Because each read was represented as a single point coordinate corresponding to their 5' edges, these coordinates were centered by 70bp in order to indicate the nucleosome dyads. Finally, another dataset was used for one analysis only. These data were released by Gaffney and colleagues \cite{gaffney_controls_2012} and can be found at \url{https://ccg.epfl.ch/mga/hg19/gaffney12/gaffney12.html} and were not centered as the coordinates already represent the center of paired-end sequenced fragments. The dataset is labeled "All Paired-end samples - 147bp fragments". Chromatin accessibility was assessed using DNaseI-seq data released by the ENCODE Consortium \cite{boyle_high-resolution_2008} (GSE32970). To increase sequencing depth, all replicates available for GM12878 cells were pooled together, resulting in ~144 mio reads, and used as a single dataset. The individual replicates can found at \url{https://ccg.epfl.ch/mga/hg19/encode/Duke-DNaseI-HS/Duke-DNaseI-HS.html}. The reads were represented as a single point coordinate corresponding the their 5' edges but were not centered as this correspond to the exact DNaseI nick location. The EPDnew release 003 was used as TSS annotation \cite{dreos_eukaryotic_2017} and genome sequence conservation was assessed using Phastcons \cite{siepel_evolutionarily_2005}. Both datasets can be found at \url{https://ccg.epfl.ch/mga/hg19/epd/epd.html} and \ref{https://ccg.epfl.ch/mga/hg19/phastcons/phastcons.html} respectively. \subsection{Classification of MNase patterns} \label{encode_peaks_em_mnase} For each TF peaklist MNase, DNase, sequence conservation and TSS density around TF binding site were assessed independently by counting the number of read mapped from -999bp to +1000bp around each peak, using 10bp bins. For each TF, 4 matrices having one row per binding site (peak) and 199 columns were created using ChIP-extract program \citep{ambrosini_chip-seq_2016}. Probabilistic pattern classification was achieved using the ChIPPartitioning (see section \ref{encode_peaks_chippartitioning}). The algorithm was implemented as described in the supplemental materials of \cite{nair_probabilistic_2014}. Two different procedures were used to classified MNase patterns. Both were run for 10 iterations allowing flip and a value of shift of 15 bins. The first procedure aimed to discover 4 different pattern classes, allowing flip and a shift of 15 bins. The procedure was initialized with 4 classes. The class patterns were initialized by assigning each peak a random probability to belong to each of the 4 classes. The patterns were then computed as the weighted average of the signal given the peak class probabilities as weights. Then the prior class probabilities were initialized as $p_{k,s,f} = 1/K*S*2$ where $k$ is the class index, $s$ is the shift value in bins (here 15), $f$ is an indicative variable for the flip state (1 for "normal", 2 for "reverse"), $K$ is the number of classes (here 4) and $S$ is the maximum allowed shift in bins. The classification was run for 10 iterations. At the end, it returned a matrix of dimensions $NxKxSx2$ containing the probabilities for each of the $N$ region to belong to each of the $K$ class, for each possible shift state $S$ and for both flip states ("normal" or "reversed"). The second procedure aimed to discriminate between 2 classes : i) the binding sites describing the "average" binding sites as opposed to ii) those differing from this. To do so, class patterns were initialized to i) the aggregation over all peaks (the average pattern) and ii) a flat pattern being the mean number of counts of the input matrix. Flip and 15 bins of shift were allowed. The prior class probabilities were initialized as $p_{k,s,f} = \mathcal{N}(s,floor(S/2)+1,1)$ where the second and third parameters are the mean and the standard deviation, giving a higher prior probability to states with shift equal to 0bp. \subsection{Quantifying nucleosome array intensity from classification results} Nucleosome array intensity was quantified using a method developed by Zhang and colleagues \citep{zhang_canonical_2014}. Briefly, nucleosome signal is represented in 2 dimensions as a set of signal intensities for a given set of positions. Data are structured as vector $Y$ containing the nucleosome occupancy signal (for instance an EM classification class profile) for $n$ bins (for EM class profiles, 199 bins of 10bp). First, the 1$^{st}$ order derivative $D_{1}$ of $Y$ is computed. Then the 1$^{st}$ order derivative $D_{2}$ of the absolute value of $D_{1}$ is computed. Local maxima in $D_{2}$ are searched using a windows of 15 bins (corresponding to 150bp, a nucleosome width). Maxima can be interpreted as strong drop or enrichment of signal, corresponding to a pattern expected from a well positioned nucleosome array. Finally, all $D_{2}$ maxima are joint by a line and the nucleosome array intensity at each given position is the height of the line at this position. The nucleosome array density for the first and last position of $Y$ were set to 0. The average nucleosome array intensity of $Y$ was used as the nucleosome array value of the input data. The classification of a matrix of counts having $N$ rows (regions), with $K$ classes, allowing a maximum of $S$ shift states and two flip states ("normal" and "reverse") outputs a probability matrix $P$ of dimension [$N$, $K$, $S$, 2] containing the probability for each region to belong to each class, given a shift state and a flip state. This matrix can be used to compute a vector $D_{k}$ of length $S$ containing the probability density of the shift states for a class $k$ using : \begin{equation} \begin{aligned} D_{k,s} & = \frac {\sum_{i=1}^{N} (P_{i,k,s,1} + P_{i,k,s',2})} {\sum_{i=1}^{N} \sum_{s=1}^{S} (P_{i,k,s,1} + P_{i,k,s',2})} \\ \text{with } \\ s' & = S - s + 1 \end{aligned} \label{encode_peaks_equation_shift_density1} \end{equation} \citep{ambrosini_chip-seq_2016} where $s'$ represents the index of the reverse orientation and with the constrain that all the elements of $P$ sum to 1. Given the shift probability density vector $D_{k}$ of one class, computing its standard deviation was done using : \begin{equation} \begin{aligned} \sigma_{k} & = \sqrt { \sum_{i=1}^{S} (X_{i}^{2} \cdot D_{k,i}) - \mu_{k}^{2} }\\ \text{with } \\ \mu_{k} & = \sum_{i=1}^{S} (X_{i} \cdot D_{k,i}) \end{aligned} \label{encode_peaks_equation_shift_density2} \end{equation} where $X$ is a vector containing the position changes in bp for every shift state, i.g. for a maximum number of shift states of 15 ($S=15$) with bins of 10bp, X would contain [-70, -60, ..., 0, ..., +60, +70]. \subsection{Peak colocalization} To measure the extent of colocalization between CTCF, YY1, ZNF143, SMC3 and RAD21, the occurrence of YY1, ZNF143, SMC3 and RAD21 peaks around CTCF peaks was computed using ChIP-extract \citep{ambrosini_chip-seq_2016}. The CTCF peak list used as reference was "wgEncodeAwgTfbsSydhGm12878Ctcfsc15914sc20UniPk" because it was the CTCF peak list containing i) the most CTCF peaks and ii) the highest proportion of peaks with a motif. Chip-extract was run separately for YY1, ZNF143, SMC3 and RAD21 using the following parameters : from -99, to 100, window size 1. Then, the propotion of CTCF peak having at least one other peak within +/-10 bp, 50bp or 100bp was computed. \subsection{NDR detection} Let us consider a matrix of MNase-seq counts $R$ of dimensions $NxL$ containing N vectors of read counts $r_{1}, r_{2}, ..., r_{n}$ of length $L$. Because MNase-seq reads are a direct indication of the nucleosome occupancy, detecting NDRs is about finding low signal regions, flanked by two high signal regions. The signal in each vector $X_i$ (region) is assumed to have been sampled from a 2 class mixture of high (nucleosome) and low (nucleosome-free) signal, using a Poisson distribution. Both classes are expected to occur with a given probability $p^{nucl}_{i}$ and $p^{free}_{i}$. The rows are considered individually to lessen technical biases such as region specific sequencing depth. The class probabilities and their mean parameters are estimated using an EM algorithm. First, during the E-step, for each position inside a region, the posterior probability of the nucleosome given the data is computed using : \begin{equation} \begin{aligned} P(nucl | r_{i,l}) = \frac{p_{i}^{nucl} \times Poisson(r_{i,l}, \lambda=m_{i}^{nucl})} {p_{i}^{nucl} \times Poisson(r_{i,l}, \lambda=m_{i}^{nucl}) + p_{i}^{free} \times Poisson(r_{i,l}, \lambda=m_{i}^{free})} \end{aligned} \end{equation} where $r_{i,l}$ is the number of reads at position $l$ in the i-th row of $R$, $m_{i}^{nucl}$ and $m_{i}^{free}$ are the mean parameters of the nucleosome and nucleosome-free classes respectively. Obviously, the nucleosome-free class posterior probability is \begin{equation} \begin{aligned} P(free | r_{i,l}) = 1 - P(nucl | r_{i,l}) \end{aligned} \end{equation} Then, during the M-step, the class mean parameters are updated using \begin{equation} \begin{aligned} m_{i}^{nucl} = & \sum_{l=1}^{L} r_{i,l} \times P(nucl | r_{i,l}) \\ m_{i}^{free} = & \sum_{l=1}^{L} r_{i,l} \times P(free | r_{i,l}) \end{aligned} \end{equation} and the class probabilities : \begin{equation} \begin{aligned} p_{i}^{nucl} = & \frac{1} {L} \times \sum_{l=1}^{L} P(nucl | r_{i,l}) \\ p_{i}^{free} = & 1 - p_{i}^{nucl} \end{aligned} \end{equation} The EM optimization of the parameter estimates was repeated for 10 iterations. At the end of the parameter estimation process, each of the $L$ positions in a region $R_{i}$ were assigned two posterior probabilities $P(nucl | r_{i,l})$ and $P(free | r_{i,l})$ to belong to each class. In all cases, the nucleosome class was the class having the highest mean parameter and the nucleosome free class the class with the smallest ($m_{i}^{nucl} > m_{i}^{free}$). The binding sites - located in the center of the regions, at position $s = L/2$ - were assumed to be within the NDR. From that point, the NDR was extended using the following procedure : \SetKwProg{Fn}{}{\{}{}\SetKwFunction{Function}{float NDRextend}% \begin{algorithm}[H] \label{encode_peaks_algo_ndr_extend} \Fn{\Function{}} { \KwData{The posterior probabilities obtained for each position of $r_{i}$.} \KwResult{the left and right coordinates of the NDR} \tcp{NDR only covers the central location} $left = s$ \; $right = s$ \; \While{$left \ne 2$ and $right \ne L-1$} { $p.free.l = P(free|r_{i,left})$ \; $p.free.r = P(free|r_{i,right})$ \; $p.nucl.l = P(nucl|r_{i,left})$ \; $p.nucl.r = P(nucl|r_{i,right})$ \; \tcp{bidirectional extension} \If{$prob.free.l > p.nucl.l$ and $p.prob.free.r > p.nucl.r$} { $left \minuseq 1$ \; $right \pluseq 1$ \; } \tcp{extension to left} \ElseIf{$prob.free.l > p.nucl.l$} { $left \minuseq 1$ \; } \tcp{extension to right} \ElseIf{$p.prob.free.r > p.nucl.r$} { $right \pluseq 1$ \; } \tcp{no more extension possible} \Else { break \; } } \Return{$left$, $right$} } \caption{Searches the coordinates of the NDR using the posterior nucleosome and nucleosome free class probabilities, for a region $R_i$, from its central position.} \end{algorithm} The nucleosome occupancy around CTCF binding sites was measured using ChIP-extract with "wgEncodeAwgTfbsSydhGm12878Ctcfsc15914sc20UniPk" peak list as reference - because it was the CTCF peak list with the most peaks and with the highest proportion of peaks with a CTCF motif -, the ENCODE MNase-seq data described in section \ref{encode_peaks_methods_data} as targets and the following parameters : from -999bp, to 1000bp and window size 10bp. This matrix was subjected to a ChIPPartitioning partitioning, as described in section \ref{encode_peaks_em_mnase}, to find 4 nucleosome architectures, using shifting and flipping. The resulting posterior probabilities were used to re-orient the data. If the major shift state - that is the shift state with the highest overall probability - for a given region was the "reverse" state, then the row was reversed. The re-oriented matrix was then subjected to the NDR detection. The re-orientation was done for aesthetic purposes only. Because the NDR detection was performed starting from the center position in each region - and given that reverting a vector did not change its central position - this operation had no influence on the NDR detection. \subsection{CTCF and JunD interactors} % Enumerating motif instances genome-wide To enumerate instance of CTCF and JunD motif, the hg19 genome assembly was scanned using CTCF (MA0139.1 from JASPAR Core Vertebrate 2014 \citep{mathelier_jaspar_2014}) and JunD (JUND\_HUMAN.H10MO.A from HOCOMOCOv10 \citep{kulakovskiy_hocomoco:_2016}) matrices to produce lists of potential binding sites. A limit score threshold was set as the score corresponding to a pvalue of 1e-5 for each matrix, respectively. This was done using matrix\_scan program from PWMScan \citep{ambrosini_pwmscan:_2018}. Eventually, any motif instance falling inside a region classified as being a repeated element and blacklisted by the ENCODE Consortium was filtered out using count\_filter program from the ChIP-seq tools \citep{ambrosini_chip-seq_2016-1}. % Measuring motif instance occurence near peaks Then, for each TF peak list independently, the number of i) the TF and ii) CTCF/JunD instances +/- 1kb of each peak was measured, in bins of 1bp, using ChIP-extract program from the ChIP-seq tools \citep{ambrosini_chip-seq_2016-1}. The association were measured as follows : using the ChIP-extract results for the given peak list versus i) the TF and ii) CTCF/Jund motif instances, the number of peaks having i) at least one TF and one CTCF/Jund motif instances, ii) only TF motif instances, iii) only CTCF/JunD motif instances or iv) no motif instance. These numbers were used to build a contingency table and a two-sided Fisher exact test for association was performed. The motif relationship was considered significant if the test OR was bigger than 1 and the 95\% CI of the OR did not contain 1 or as a significant motif exclusion if the OR was smaller than 1 and the 95\% CI of the OR did not contain 1. % Motif density around peaks The motif occurence densities were computed from the ChiP-extract result matrices. Out of each matrix, a vector containing the number of motif instances at each possible absolute distance was computed. This was done as follows : first each each non-null cell neighbours were incremented (+/- 5 columns on each side) to turn motif instance hits into non point-like representation. A given cell value could be incremented several times. Second for each row, the column corresponding to the same absolute distances from the peak were summed together (i.g. +1bp with -1bp, +2bp with -2bp, +999bp with -999bp). The first column of the resulting matrix should contain number of motif instances present at the peak center (distance of 0bp), the second column at an absolute distance of 1bp and so one. Eventually, the row were summed up and the resulting vector was considered as the motif density vector for the given peak list. The vectors were used to create a matrix for CTCF motif and Jund motif (a vector corresponds to a row), separately, and the matrix was displayed as a heatmap. The row values were standardized and the rows hierarchically clustered using the euclidean distance. \subsection{EBF1 and nucleosome} The correlation between EBF1 binding sites and nucleosome dyads was made using ChIP-cor \citep{ambrosini_chip-seq_2016-1}, from the web (\url{https://ccg.epfl.ch/chipseq/chip_cor.php}). The references were the corrected EBF1 peaks (wgEncodeAwgTfbsHaibGm12878Ebf1sc137065Pcr1xUniPk dataset, for more details see section \ref{encode_peaks_methods_data}) and the targets either i) the MNase-seq data released by Gaffney et al. \citep{gaffney_controls_2012} (hg19 / DNase FAIRE etc / Gaffney 2012 ... / All Paired-end samples - 147bp fragments) or ii) the ENCODE MNase-seq data (hg19 / ENCODE DNase FAIRE etc / GSE35586 ... / GM12878 Nucleosome all (SLOW!)). In both cases, "any" strand was selected. Because Gaffney data are paired-ended and represent the fragment midpoint (the dyad), no centering was done. The ENCODE data are single-ended and a centering of 70bp (half a nucleosome) was applied to approximate the fragment midpoint. The count cut-off was set to 1 and the range to -399 to +400bp. To isolate nucleosomes with an EBF1 binding site, the opposite ChIP-cor analysis was run : Gaffney data as references versus EBF1 binding sites as targets with count cut-off set to 1 and the range to -399 to +400bp. In the results page the "Feature Selection Tool" was used to select dyads with at least 1 EBF1 binding site (threshold parameter) located "From" -99bp "To" 100bp. The count cut-off was set to 9999 and both "Switch to depleted feature" and "Reference feature oriented" set to "Off". These nucleosome dyads were uploaded to OProf (\url{https://ccg.epfl.ch/ssa/oprof.php}) on the SSA server \citep{ambrosini_signal_2003}. Four individual analyses were run to measure the "WW", "SS", "YRRRRRYYYYYR" and EBF1 motif occurrences. In all cases, the 5' and 3' borders were set to -399bp and 400bp, the window shift to 1bp and the search mode to "bidirectional". For "SS" and "WW", the motif to search was entered as a "Consensus sequence", the window size was set to 2bp, the reference position to 1 and the number of allowed mismatches to 0. For "YRRRRRYYYYYR", the motif was also entered as a "Consensus sequence", the window size was set to 12bp, the reference position to 6 and the number of allowed mismatches to 4. For the EBF1 motif, the JASPAR CORE Vertebrate 2018 "EBF1 MA0154.3 (length=14)" was used with a window size of 14bp, a reference position of 7 and a p-value threshold of 1e-4. To investigate the chromatin architecture around nucleosome dyads, ChIP-cor was used. Two references were used : i) the nucleosomes with an EBF1 binding site (see above) and ii) the entire Gaffney dataset (hg19 / DNase FAIRE etc / Gaffney 2012 ... / All Paired-end samples - 147bp fragments). For each reference, three analyses were run against different target features : i) DNase-seq data to monitor chromatin accessibility (hg19 / ENCODE DNase FAIRE etc / Boyle 2008 ... DNaseI HS - GM12878 - Rep 1) with "any" strand and no centering, ii) H3K4me2 ChIP-seq data (hg19 / ENCODE ChIP-seq / GSE29611 ... / GM12878 H3k4me2) with "any" strand and a centering of 70bp (half the nucleosome) and iii) positional sequence conservation scores (hg19 / Sequence derived / Vertebrate Conservation (phastCons46way) ... / PHASTCONS VERT46) with "any" strand an no centering. For DNase-seq and sequence conservation, the range was set to -399bp to 400bp with a window with of 1bp. For H3K3me2 data, the range was set to -3999bp to 4000bp with a window width of 10bp. For the DNase-seq and the H3K4me2 data, the count cut-off were set to 1, for the sequence conservation to 9999. diff --git a/my_thesis.aux b/my_thesis.aux index 56a96e2..1adac36 100644 --- a/my_thesis.aux +++ b/my_thesis.aux @@ -1,152 +1,150 @@ \relax \providecommand\hyper@newdestlabel[2]{} \providecommand\BKM@entry[2]{} \catcode `:\active \catcode `;\active \catcode `!\active \catcode `?\active \catcode `"\active \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined \global\let\oldcontentsline\contentsline \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} \global\let\oldnewlabel\newlabel \gdef\newlabel#1#2{\newlabelxx{#1}#2} \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} \AtEndDocument{\ifx\hyper@anchor\@undefined \let\contentsline\oldcontentsline \let\newlabel\oldnewlabel \fi} \fi} \global\let\hyper@last\relax \gdef\HyperFirstAtBeginDocument#1{#1} \providecommand\HyField@AuxAddToFields[1]{} \providecommand\HyField@AuxAddToCoFields[2]{} \providecommand \oddpage@label [2]{} \babel@aux{english}{} \babel@aux{french}{} \babel@aux{english}{} \@input{head/dedication.aux} \@input{head/acknowledgements.aux} \BKM@entry{id=1,dest={636861707465722A2E31},srcline={3}}{41636B6E6F776C656467656D656E7473} \pgfsyspdfmark {pgfid2}{0}{40463552} \pgfsyspdfmark {pgfid1}{6}{40498788} \@input{head/preface.aux} \BKM@entry{id=2,dest={636861707465722A2E32},srcline={4}}{50726566616365} \pgfsyspdfmark {pgfid4}{0}{40463552} \pgfsyspdfmark {pgfid3}{6}{40498788} \@input{head/abstracts.aux} \BKM@entry{id=3,dest={636861707465722A2E33},srcline={9}}{4162737472616374205C28456E676C6973682F4672616E5C3334376169732F446575747363685C29} \pgfsyspdfmark {pgfid6}{0}{40463552} \pgfsyspdfmark {pgfid5}{6}{40498788} \pgfsyspdfmark {pgfid8}{0}{40463552} \pgfsyspdfmark {pgfid7}{6}{40498788} \pgfsyspdfmark {pgfid10}{0}{40463552} \pgfsyspdfmark {pgfid9}{6}{40498788} \BKM@entry{id=4,dest={746F632E30},srcline={30}}{436F6E74656E7473} \pgfsyspdfmark {pgfid12}{0}{40463552} \pgfsyspdfmark {pgfid11}{6}{40498788} \@input{main/ch_introduction.aux} \BKM@entry{id=5,dest={636861707465722A2E37},srcline={5}}{496E74726F64756374696F6E} \pgfsyspdfmark {pgfid14}{0}{40463552} \pgfsyspdfmark {pgfid13}{6}{40498788} \@input{main/ch_group_projects.aux} \BKM@entry{id=6,dest={636861707465722E31},srcline={2}}{5075626C6973686564206C61626F7261746F72792070726F6A65637473} \BKM@entry{id=7,dest={636861707465722E31},srcline={5}}{5075626C6973686564206C61626F7261746F72792070726F6A65637473} \BKM@entry{id=8,dest={73656374696F6E2E312E31},srcline={12}}{4D6173732047656E6F6D6520416E6E6F746174696F6E207265706F7369746F7279} \BKM@entry{id=9,dest={73756273656374696F6E2E312E312E31},srcline={17}}{496E74726F64756374696F6E} \BKM@entry{id=10,dest={73756273656374696F6E2E312E312E32},srcline={23}}{4D474120636F6E74656E7420616E64206F7267616E697A6174696F6E} \pgfsyspdfmark {pgfid16}{0}{40463552} \pgfsyspdfmark {pgfid15}{6}{40511883} \BKM@entry{id=11,dest={73756273656374696F6E2E312E312E33},srcline={58}}{436F6E636C7573696F6E73} \BKM@entry{id=12,dest={73656374696F6E2E312E32},srcline={64}}{45756B6172796F7469632050726F6D6F746572204461746162617365} \BKM@entry{id=13,dest={73756273656374696F6E2E312E322E31},srcline={68}}{496E74726F64756374696F6E} \BKM@entry{id=14,dest={73756273656374696F6E2E312E322E32},srcline={86}}{4550446E6577206E6F7720616E6E6F7461746573205C28736F6D65206F665C2920796F7572206D757368726F6F6D7320616E6420766567657461626C6573} \BKM@entry{id=15,dest={73756273656374696F6E2E312E322E33},srcline={120}}{496E63726561736564206D617070696E6720707265636973696F6E20696E2068756D616E} \BKM@entry{id=16,dest={73756273656374696F6E2E312E322E34},srcline={132}}{496E746567726174696F6E206F66204550446E65772077697468206F74686572207265736F7572636573} \BKM@entry{id=17,dest={73756273656374696F6E2E312E322E35},srcline={138}}{436F6E636C7573696F6E73} \BKM@entry{id=18,dest={73756273656374696F6E2E312E322E36},srcline={142}}{4D6574686F6473} \BKM@entry{id=19,dest={73656374696F6E2E312E33},srcline={150}}{50574D5363616E} \BKM@entry{id=20,dest={73756273656374696F6E2E312E332E31},srcline={164}}{496E74726F64756374696F6E} \BKM@entry{id=21,dest={73756273656374696F6E2E312E332E32},srcline={191}}{4461746120616E64206D6574686F6473} \BKM@entry{id=22,dest={73756273656374696F6E2E312E332E33},srcline={217}}{42656E63686D61726B} \BKM@entry{id=23,dest={73756273656374696F6E2E312E332E34},srcline={287}}{436F6E636C7573696F6E73} \BKM@entry{id=24,dest={73656374696F6E2E312E34},srcline={297}}{535061722D4B} \BKM@entry{id=25,dest={73756273656374696F6E2E312E342E31},srcline={305}}{496E74726F64756374696F6E} \BKM@entry{id=26,dest={73756273656374696F6E2E312E342E32},srcline={318}}{4D6574686F6473} \BKM@entry{id=27,dest={73756273656374696F6E2E312E342E33},srcline={349}}{526573756C7473} \BKM@entry{id=28,dest={73756273656374696F6E2E312E342E34},srcline={356}}{436F6E636C7573696F6E} \@input{main/ch_encode_peaks.aux} \BKM@entry{id=29,dest={636861707465722E32},srcline={2}}{454E434F4445207065616B7320616E616C79736973} \BKM@entry{id=30,dest={636861707465722E32},srcline={5}}{454E434F4445207065616B7320616E616C79736973} \BKM@entry{id=31,dest={73656374696F6E2E322E31},srcline={22}}{44617461} \pgfsyspdfmark {pgfid18}{0}{40463552} \pgfsyspdfmark {pgfid17}{6}{40511883} \BKM@entry{id=32,dest={73656374696F6E2E322E32},srcline={45}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206368726F6D6174696E2061726368697465637475726573} \BKM@entry{id=33,dest={73756273656374696F6E2E322E322E31},srcline={64}}{44617461207265616C69676E6D656E74} \BKM@entry{id=34,dest={73656374696F6E2E322E33},srcline={76}}{4E75636C656F736F6D65206F7267616E697A6174696F6E2061726F756E64207472616E736372697074696F6E20666163746F722062696E64696E67207369746573} \BKM@entry{id=35,dest={73656374696F6E2E322E34},srcline={103}}{5468652063617365206F6620435443462C2052414432312C20534D43332C2059593120616E64205A4E46313433} \BKM@entry{id=36,dest={73656374696F6E2E322E35},srcline={141}}{4354434620616E64204A756E4420696E7465726163746F6D6573} \BKM@entry{id=37,dest={73656374696F6E2E322E36},srcline={230}}{454246312062696E6473206E75636C656F736F6D6573} \BKM@entry{id=38,dest={73656374696F6E2E322E37},srcline={267}}{4D6574686F6473} \BKM@entry{id=39,dest={73756273656374696F6E2E322E372E31},srcline={269}}{4461746120616E6420646174612070726F63657373696E67} \BKM@entry{id=40,dest={73756273656374696F6E2E322E372E32},srcline={282}}{436C617373696669636174696F6E206F66204D4E617365207061747465726E73} \BKM@entry{id=41,dest={73756273656374696F6E2E322E372E33},srcline={295}}{5175616E74696679696E67206E75636C656F736F6D6520617272617920696E74656E736974792066726F6D20636C617373696669636174696F6E20726573756C7473} \BKM@entry{id=42,dest={73756273656374696F6E2E322E372E34},srcline={324}}{5065616B20636F6C6F63616C697A6174696F6E} \BKM@entry{id=43,dest={73756273656374696F6E2E322E372E35},srcline={328}}{4E445220646574656374696F6E} \BKM@entry{id=44,dest={73756273656374696F6E2E322E372E36},srcline={420}}{4354434620616E64204A756E4420696E7465726163746F7273} \BKM@entry{id=45,dest={73756273656374696F6E2E322E372E37},srcline={432}}{4542463120616E64206E75636C656F736F6D65} \@input{main/ch_smile-seq.aux} \BKM@entry{id=46,dest={636861707465722E33},srcline={2}}{534D694C452D736571206461746120616E616C79736973} \BKM@entry{id=47,dest={636861707465722E33},srcline={5}}{534D694C452D736571206461746120616E616C79736973} \BKM@entry{id=48,dest={73756273656374696F6E2E332E302E31},srcline={19}}{496E74726F64756374696F6E} \pgfsyspdfmark {pgfid20}{0}{40463552} \pgfsyspdfmark {pgfid19}{6}{40511883} \BKM@entry{id=49,dest={73756273656374696F6E2E332E302E32},srcline={36}}{48696464656E204D61726B6F76204D6F64656C204D6F74696620646973636F76657279} \BKM@entry{id=50,dest={73756273656374696F6E2E332E302E33},srcline={61}}{42696E64696E67206D6F746966206576616C756174696F6E} \BKM@entry{id=51,dest={73756273656374696F6E2E332E302E34},srcline={115}}{526573756C7473} \BKM@entry{id=52,dest={73756273656374696F6E2E332E302E35},srcline={133}}{436F6E636C7573696F6E73} \@input{main/ch_atac-seq.aux} \BKM@entry{id=53,dest={636861707465722E34},srcline={2}}{4368726F6D6174696E206163636573736962696C697479206F66206D6F6E6F6379746573} -\BKM@entry{id=54,dest={73656374696F6E2E342E31},srcline={14}}{415441432D736571} +\BKM@entry{id=54,dest={73656374696F6E2E342E31},srcline={16}}{415441432D736571} \pgfsyspdfmark {pgfid22}{0}{40463552} \pgfsyspdfmark {pgfid21}{6}{40511883} -\BKM@entry{id=55,dest={73656374696F6E2E342E32},srcline={31}}{4D6F6E69746F72696E672054462062696E64696E67} -\BKM@entry{id=56,dest={73656374696F6E2E342E33},srcline={40}}{54686520616476656E74206F662073696E676C652063656C6C20444746} -\BKM@entry{id=57,dest={73656374696F6E2E342E34},srcline={48}}{4120717569636B206F76657276696577206F66207363415441432D736571206461746120616E616C79736973} -\BKM@entry{id=58,dest={73656374696F6E2E342E35},srcline={53}}{4F70656E207175657374696F6E73} -\BKM@entry{id=59,dest={73656374696F6E2E342E36},srcline={67}}{44617461} -\BKM@entry{id=60,dest={73656374696F6E2E342E37},srcline={78}}{4964656E74696669636174696F6E206F6620636174616C6F67206F66206368726F6D6174696E2061726368697465637475726573} -\BKM@entry{id=61,dest={73756273656374696F6E2E342E372E31},srcline={83}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642072656164207061747465726E73} -\BKM@entry{id=62,dest={73756273656374696F6E2E342E372E32},srcline={103}}{454D53657175656E6365203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E636573} -\BKM@entry{id=63,dest={73756273656374696F6E2E342E372E33},srcline={193}}{454D4A6F696E74203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E63657320616E64206368726F6D6174696E2061726368697465637475726573} -\BKM@entry{id=64,dest={73756273656374696F6E2E342E372E34},srcline={228}}{44617461207265616C69676E6D656E74} -\BKM@entry{id=65,dest={73656374696F6E2E342E38},srcline={242}}{526573756C7473} -\BKM@entry{id=66,dest={73756273656374696F6E2E342E382E31},srcline={246}}{467261676D656E742073697A6520616E616C79736973} -\BKM@entry{id=67,dest={73756273656374696F6E2E342E382E32},srcline={275}}{4D6561737572696E67206F70656E206368726F6D6174696E20616E64206E75636C656F736F6D65206F63637570616E6379} -\BKM@entry{id=68,dest={73756273656374696F6E2E342E382E33},srcline={305}}{4576616C756174696F6E206F6620454D53657175656E636520616E642043684950506172746974696F6E696E67} -\BKM@entry{id=69,dest={73656374696F6E2E342E39},srcline={375}}{416C69676E696E67207468652062696E64696E67207369746573} -\BKM@entry{id=70,dest={73656374696F6E2E342E3130},srcline={400}}{4578706C6F72696E6720696E646976696475616C20544620636C6173736573} -\BKM@entry{id=71,dest={73656374696F6E2E342E3131},srcline={416}}{44697363757373696F6E73} -\BKM@entry{id=72,dest={73656374696F6E2E342E3132},srcline={426}}{506572737065637469766573} -\BKM@entry{id=73,dest={73656374696F6E2E342E3133},srcline={436}}{4D6574686F6473} -\BKM@entry{id=74,dest={73756273656374696F6E2E342E31332E31},srcline={438}}{496D706C656D656E746174696F6E73} -\BKM@entry{id=75,dest={73756273656374696F6E2E342E31332E32},srcline={453}}{467261676D656E7420636C6173736573} -\BKM@entry{id=76,dest={73756273656374696F6E2E342E31332E33},srcline={467}}{53696D756C617465642073657175656E636573} -\BKM@entry{id=77,dest={73756273656374696F6E2E342E31332E34},srcline={471}}{5265616C69676E6D656E74207573696E67204A4153504152206D6F74696673} -\BKM@entry{id=78,dest={73756273656374696F6E2E342E31332E35},srcline={474}}{446973706C6179206F66206D6F746966206C6F676F} -\BKM@entry{id=79,dest={73756273656374696F6E2E342E31332E36},srcline={477}}{4D6F64656C20657874656E73696F6E} -\BKM@entry{id=80,dest={73756273656374696F6E2E342E31332E37},srcline={480}}{45787472616374696E6720646174612061737369676E656420746F206120636C617373} -\@input{tail/appendix.aux} -\BKM@entry{id=81,dest={617070656E6469782E41},srcline={5}}{416E20617070656E646978} -\BKM@entry{id=82,dest={73656374696F6E2E412E31},srcline={7}}{537570706C656D656E746172792066696775726573} +\BKM@entry{id=55,dest={73656374696F6E2E342E32},srcline={33}}{4D6F6E69746F72696E672054462062696E64696E67} +\BKM@entry{id=56,dest={73656374696F6E2E342E33},srcline={42}}{54686520616476656E74206F662073696E676C652063656C6C20444746} +\BKM@entry{id=57,dest={73656374696F6E2E342E34},srcline={69}}{4F70656E20697373756573} +\BKM@entry{id=58,dest={73656374696F6E2E342E35},srcline={73}}{44617461} +\BKM@entry{id=59,dest={73656374696F6E2E342E36},srcline={84}}{4964656E74696679696E67206F7665722D726570726573656E746564207369676E616C73} +\BKM@entry{id=60,dest={73756273656374696F6E2E342E362E31},srcline={89}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642072656164207061747465726E73} +\BKM@entry{id=61,dest={73756273656374696F6E2E342E362E32},srcline={101}}{454D53657175656E6365203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E636573} +\BKM@entry{id=62,dest={73756273656374696F6E2E342E362E33},srcline={200}}{454D4A6F696E74203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E63657320616E64206368726F6D6174696E2061726368697465637475726573} +\BKM@entry{id=63,dest={73756273656374696F6E2E342E362E34},srcline={235}}{44617461207265616C69676E6D656E74} +\BKM@entry{id=64,dest={73656374696F6E2E342E37},srcline={249}}{526573756C7473} +\BKM@entry{id=65,dest={73756273656374696F6E2E342E372E31},srcline={253}}{467261676D656E742073697A6520616E616C79736973} +\BKM@entry{id=66,dest={73756273656374696F6E2E342E372E32},srcline={282}}{4D6561737572696E67206F70656E206368726F6D6174696E20616E64206E75636C656F736F6D65206F63637570616E6379} +\BKM@entry{id=67,dest={73756273656374696F6E2E342E372E33},srcline={312}}{4576616C756174696F6E206F6620454D53657175656E636520616E642043684950506172746974696F6E696E67} +\BKM@entry{id=68,dest={73656374696F6E2E342E38},srcline={382}}{416C69676E696E67207468652062696E64696E67207369746573} +\BKM@entry{id=69,dest={73656374696F6E2E342E39},srcline={407}}{4578706C6F72696E6720696E646976696475616C20544620636C6173736573} +\BKM@entry{id=70,dest={73656374696F6E2E342E3130},srcline={423}}{44697363757373696F6E73} +\BKM@entry{id=71,dest={73656374696F6E2E342E3131},srcline={433}}{506572737065637469766573} +\BKM@entry{id=72,dest={73656374696F6E2E342E3132},srcline={443}}{4D6574686F6473} +\BKM@entry{id=73,dest={73756273656374696F6E2E342E31322E31},srcline={445}}{496D706C656D656E746174696F6E73} +\BKM@entry{id=74,dest={73756273656374696F6E2E342E31322E32},srcline={460}}{467261676D656E7420636C6173736573} +\BKM@entry{id=75,dest={73756273656374696F6E2E342E31322E33},srcline={474}}{53696D756C617465642073657175656E636573} +\BKM@entry{id=76,dest={73756273656374696F6E2E342E31322E34},srcline={478}}{5265616C69676E6D656E74207573696E67204A4153504152206D6F74696673} +\BKM@entry{id=77,dest={73756273656374696F6E2E342E31322E35},srcline={523}}{4D6F64656C20657874656E73696F6E} +\BKM@entry{id=78,dest={73756273656374696F6E2E342E31322E36},srcline={526}}{45787472616374696E6720646174612061737369676E656420746F206120636C617373} \@writefile{toc}{\vspace {\normalbaselineskip }} +\@input{tail/appendix.aux} +\BKM@entry{id=79,dest={617070656E6469782E41},srcline={5}}{416E20617070656E646978} +\BKM@entry{id=80,dest={73656374696F6E2E412E31},srcline={7}}{537570706C656D656E746172792066696775726573} \pgfsyspdfmark {pgfid24}{0}{40463552} \pgfsyspdfmark {pgfid23}{6}{40511883} \@input{tail/biblio.aux} -\BKM@entry{id=83,dest={73656374696F6E2A2E3632},srcline={3}}{4269626C696F677261706879} +\BKM@entry{id=81,dest={73656374696F6E2A2E3632},srcline={3}}{4269626C696F677261706879} \pgfsyspdfmark {pgfid26}{0}{40463552} \pgfsyspdfmark {pgfid25}{6}{40498788} -\BKM@entry{id=84,dest={617070656E6469782A2E3633},srcline={6}}{4269626C696F677261706879} +\BKM@entry{id=82,dest={617070656E6469782A2E3633},srcline={6}}{4269626C696F677261706879} \@input{tail/cv.aux} -\BKM@entry{id=85,dest={73656374696F6E2A2E3634},srcline={4}}{437572726963756C756D205669746165} +\BKM@entry{id=83,dest={73656374696F6E2A2E3634},srcline={4}}{437572726963756C756D205669746165} diff --git a/my_thesis.bbl b/my_thesis.bbl index db40db0..e1b4142 100644 --- a/my_thesis.bbl +++ b/my_thesis.bbl @@ -1,685 +1,680 @@ \begin{thebibliography}{} \bibitem[Adey et~al., 2010]{adey_rapid_2010} Adey, A., Morrison, H.~G., {Asan}, Xun, X., Kitzman, J.~O., Turner, E.~H., Stackhouse, B., MacKenzie, A.~P., Caruccio, N.~C., Zhang, X., and Shendure, J. (2010). \newblock Rapid, low-input, low-bias construction of shotgun fragment libraries by high-density in vitro transposition. \newblock {\em Genome Biology}, 11(12):R119. \bibitem[Aerts et~al., 2003]{aerts_toucan:_2003} Aerts, S., Thijs, G., Coessens, B., Staes, M., Moreau, Y., and Moor, B.~D. (2003). \newblock Toucan: deciphering the cis ‐regulatory logic of coregulated genes. \newblock {\em Nucleic Acids Research}, 31(6):1753--1764. \bibitem[Aibar et~al., 2017]{aibar_scenic:_2017} Aibar, S., González-Blas, C.~B., Moerman, T., Huynh-Thu, V.~A., Imrichova, H., Hulselmans, G., Rambow, F., Marine, J.-C., Geurts, P., Aerts, J., van~den Oord, J., Atak, Z.~K., Wouters, J., and Aerts, S. (2017). \newblock {SCENIC}: single-cell regulatory network inference and clustering. \newblock {\em Nature Methods}, 14(11):1083--1086. \bibitem[Alipanahi et~al., 2015]{alipanahi_predicting_2015} Alipanahi, B., Delong, A., Weirauch, M.~T., and Frey, B.~J. (2015). \newblock Predicting the sequence specificities of {DNA}- and {RNA}-binding proteins by deep learning. \newblock {\em Nature Biotechnology}, 33(8):831--838. \bibitem[Ambrosini et~al., 2016a]{ambrosini_chip-seq_2016} Ambrosini, G., Dreos, R., Kumar, S., and Bucher, P. (2016a). \newblock The {ChIP}-{Seq} tools and web server: a resource for analyzing {ChIP}-seq and other types of genomic data. \newblock {\em BMC Genomics}, 17:938. \bibitem[Ambrosini et~al., 2016b]{ambrosini_chip-seq_2016-1} Ambrosini, G., Dreos, R., Kumar, S., and Bucher, P. (2016b). \newblock The {ChIP}-{Seq} tools and web server: a resource for analyzing {ChIP}-seq and other types of genomic data. \newblock {\em BMC Genomics}, 17(1):938. \bibitem[Ambrosini et~al., 2018]{ambrosini_pwmscan:_2018} Ambrosini, G., Groux, R., and Bucher, P. (2018). \newblock {PWMScan}: a fast tool for scanning entire genomes with a position-specific weight matrix. \newblock {\em Bioinformatics}, 34(14):2483--2484. \bibitem[Ambrosini et~al., 2003]{ambrosini_signal_2003} Ambrosini, G., Praz, V., Jagannathan, V., and Bucher, P. (2003). \newblock Signal search analysis server. \newblock {\em Nucleic Acids Research}, 31(13):3618--3620. \bibitem[Angerer et~al., 2017]{angerer_single_2017} Angerer, P., Simon, L., Tritschler, S., Wolf, F.~A., Fischer, D., and Theis, F.~J. (2017). \newblock Single cells make big data: {New} challenges and opportunities in transcriptomics. \newblock {\em Current Opinion in Systems Biology}, 4:85--91. \bibitem[Bailey et~al., 2015]{bailey_znf143_2015} Bailey, S.~D., Zhang, X., Desai, K., Aid, M., Corradin, O., Cowper-Sal·lari, R., Akhtar-Zaidi, B., Scacheri, P.~C., Haibe-Kains, B., and Lupien, M. (2015). \newblock {ZNF}143 provides sequence specificity to secure chromatin interactions at gene promoters. \newblock {\em Nature Communications}, 2:6186. \bibitem[Bailey et~al., 2009]{bailey_meme_2009} Bailey, T.~L., Boden, M., Buske, F.~A., Frith, M., Grant, C.~E., Clementi, L., Ren, J., Li, W.~W., and Noble, W.~S. (2009). \newblock {MEME} {Suite}: tools for motif discovery and searching. \newblock {\em Nucleic Acids Research}, 37(suppl\_2):W202--W208. \bibitem[Barrett et~al., 2011]{barrett_ncbi_2011} Barrett, T., Troup, D.~B., Wilhite, S.~E., Ledoux, P., Evangelista, C., Kim, I.~F., Tomashevsky, M., Marshall, K.~A., Phillippy, K.~H., Sherman, P.~M., Muertter, R.~N., Holko, M., Ayanbule, O., Yefanov, A., and Soboleva, A. (2011). \newblock {NCBI} {GEO}: archive for functional genomics data sets—10 years on. \newblock {\em Nucleic Acids Research}, 39(suppl\_1):D1005--D1010. \bibitem[Barski et~al., 2007]{barski_high-resolution_2007} Barski, A., Cuddapah, S., Cui, K., Roh, T.-Y., Schones, D.~E., Wang, Z., Wei, G., Chepelev, I., and Zhao, K. (2007). \newblock High-{Resolution} {Profiling} of {Histone} {Methylations} in the {Human} {Genome}. \newblock {\em Cell}, 129(4):823--837. \bibitem[Beckstette et~al., 2006]{beckstette_fast_2006} Beckstette, M., Homann, R., Giegerich, R., and Kurtz, S. (2006). \newblock Fast index based algorithms and software for matching position specific scoring matrices. \newblock {\em BMC Bioinformatics}, 7:389. \bibitem[Berest et~al., 2018]{berest_quantification_2018} Berest, I., Arnold, C., Reyes-Palomares, A., Palla, G., Rasmussen, K.~D., Helin, K., and Zaugg, J. (2018). \newblock Quantification of differential transcription factor activity and multiomics-based classification into activators and repressors: {diffTF}. \newblock {\em bioRxiv}. \bibitem[Berger and Bulyk, 2009]{berger_universal_2009} Berger, M.~F. and Bulyk, M.~L. (2009). \newblock Universal protein-binding microarrays for the comprehensive characterization of the {DNA}-binding specificities of transcription factors. \newblock {\em Nature Protocols}, 4(3):393--411. \bibitem[Boller et~al., 2018]{boller_defining_2018} Boller, S., Li, R., and Grosschedl, R. (2018). \newblock Defining {B} {Cell} {Chromatin}: {Lessons} from {EBF}1. \newblock {\em Trends in Genetics}, 34(4):257--269. \bibitem[Boller et~al., 2016]{boller_pioneering_2016} Boller, S., Ramamoorthy, S., Akbas, D., Nechanitzky, R., Burger, L., Murr, R., Schübeler, D., and Grosschedl, R. (2016). \newblock Pioneering {Activity} of the {C}-{Terminal} {Domain} of {EBF}1 {Shapes} the {Chromatin} {Landscape} for {B} {Cell} {Programming}. \newblock {\em Immunity}, 44(3):527--541. \bibitem[Boyle et~al., 2008]{boyle_high-resolution_2008} Boyle, A.~P., Davis, S., Shulha, H.~P., Meltzer, P., Margulies, E.~H., Weng, Z., Furey, T.~S., and Crawford, G.~E. (2008). \newblock High-{Resolution} {Mapping} and {Characterization} of {Open} {Chromatin} across the {Genome}. \newblock {\em Cell}, 132(2):311--322. \bibitem[Bucher and Trifonov, 1986]{bucher_compilation_1986} Bucher, P. and Trifonov, E.~N. (1986). \newblock Compilation and analysis of eukaryotic {POL} {II} promoter sequences. \newblock {\em Nucleic Acids Research}, 14(24):10009--10026. \bibitem[Buenrostro et~al., 2013]{buenrostro_transposition_2013} Buenrostro, J.~D., Giresi, P.~G., Zaba, L.~C., Chang, H.~Y., and Greenleaf, W.~J. (2013). \newblock Transposition of native chromatin for fast and sensitive epigenomic profiling of open chromatin, {DNA}-binding proteins and nucleosome position. \newblock {\em Nature Methods}, 10(12):1213--1218. \bibitem[Castro-Mondragon et~al., 2017]{castro-mondragon_rsat_2017} Castro-Mondragon, J.~A., Jaeger, S., Thieffry, D., Thomas-Chollier, M., and van Helden, J. (2017). \newblock {RSAT} matrix-clustering: dynamic exploration and redundancy reduction of transcription factor binding motif collections. \newblock {\em Nucleic Acids Research}, 45(13):e119--e119. \bibitem[Chatr-aryamontri et~al., 2017]{chatr-aryamontri_biogrid_2017} Chatr-aryamontri, A., Oughtred, R., Boucher, L., Rust, J., Chang, C., Kolas, N.~K., O'Donnell, L., Oster, S., Theesfeld, C., Sellam, A., Stark, C., Breitkreutz, B.-J., Dolinski, K., and Tyers, M. (2017). \newblock The {BioGRID} interaction database: 2017 update. \newblock {\em Nucleic Acids Research}, 45(D1):D369--D379. \bibitem[Cheng et~al., 2012]{cheng_understanding_2012} Cheng, C., Alexander, R., Min, R., Leng, J., Yip, K.~Y., Rozowsky, J., Yan, K.-K., Dong, X., Djebali, S., Ruan, Y., Davis, C.~A., Carninci, P., Lassman, T., Gingeras, T.~R., Guigó, R., Birney, E., Weng, Z., Snyder, M., and Gerstein, M. (2012). \newblock Understanding transcriptional regulation by integrative analysis of transcription factor binding data. \newblock {\em Genome Research}, 22(9):1658--1667. \bibitem[Cirillo et~al., 2002]{cirillo_opening_2002} Cirillo, L.~A., Lin, F.~R., Cuesta, I., Friedman, D., Jarnik, M., and Zaret, K.~S. (2002). \newblock Opening of {Compacted} {Chromatin} by {Early} {Developmental} {Transcription} {Factors} {HNF}3 ({FoxA}) and {GATA}-4. \newblock {\em Molecular Cell}, 9(2):279--289. \bibitem[Consortium, 2012]{consortium_integrated_2012} Consortium, T. E.~P. (2012). \newblock An integrated encyclopedia of {DNA} elements in the human genome. \newblock {\em Nature}, 489(7414):57--74. \bibitem[Dalton et~al., 2009]{dalton_clustering_2009} Dalton, L., Ballarin, V., and Brun, M. (2009). \newblock Clustering {Algorithms}: {On} {Learning}, {Validation}, {Performance}, and {Applications} to {Genomics}. \newblock {\em Current Genomics}, 10(6):430--445. \bibitem[Donohoe et~al., 2007]{donohoe_identification_2007} Donohoe, M.~E., Zhang, L.-F., Xu, N., Shi, Y., and Lee, J.~T. (2007). \newblock Identification of a {Ctcf} {Cofactor}, {Yy}1, for the {X} {Chromosome} {Binary} {Switch}. \newblock {\em Molecular Cell}, 25(1):43--56. \bibitem[Dreos et~al., 2013]{dreos_epd_2013} Dreos, R., Ambrosini, G., Cavin~Périer, R., and Bucher, P. (2013). \newblock {EPD} and {EPDnew}, high-quality promoter resources in the next-generation sequencing era. \newblock {\em Nucleic Acids Research}, 41(D1):D157--D164. \bibitem[Dreos et~al., 2017]{dreos_eukaryotic_2017} Dreos, R., Ambrosini, G., Groux, R., Cavin Périer, R., and Bucher, P. (2017). \newblock The eukaryotic promoter database in its 30th year: focus on non-vertebrate organisms. \newblock {\em Nucleic Acids Research}, 45(D1):D51--D55. \bibitem[Dreos et~al., 2018]{dreos_mga_2018} Dreos, R., Ambrosini, G., Groux, R., Périer, R.~C., and Bucher, P. (2018). \newblock {MGA} repository: a curated data resource for {ChIP}-seq and other genome annotated data. \newblock {\em Nucleic Acids Research}, 46(D1):D175--D180. \bibitem[Dreos et~al., 2015]{dreos_eukaryotic_2015} Dreos, R., Ambrosini, G., Périer, R.~C., and Bucher, P. (2015). \newblock The {Eukaryotic} {Promoter} {Database}: expansion of {EPDnew} and new promoter analysis tools. \newblock {\em Nucleic Acids Research}, 43(D1):D92--D96. \bibitem[Fan et~al., 2016]{fan_characterizing_2016} Fan, J., Salathia, N., Liu, R., Kaeser, G.~E., Yung, Y.~C., Herman, J.~L., Kaper, F., Fan, J.-B., Zhang, K., Chun, J., and Kharchenko, P.~V. (2016). \newblock Characterizing transcriptional heterogeneity through pathway and gene set overdispersion analysis. \newblock {\em Nature Methods}, 13(3):241--244. \bibitem[Fu et~al., 2004]{fu_motifviz:_2004} Fu, Y., Frith, M.~C., Haverty, P.~M., and Weng, Z. (2004). \newblock {MotifViz}: an analysis and visualization tool for motif discovery. \newblock {\em Nucleic Acids Research}, 32(suppl\_2):W420--W423. \bibitem[Fu et~al., 2008]{fu_insulator_2008} Fu, Y., Sinha, M., Peterson, C.~L., and Weng, Z. (2008). \newblock The {Insulator} {Binding} {Protein} {CTCF} {Positions} 20 {Nucleosomes} around {Its} {Binding} {Sites} across the {Human} {Genome}. \newblock {\em PLOS Genetics}, 4(7):e1000138. \bibitem[Gaffney et~al., 2012]{gaffney_controls_2012} Gaffney, D.~J., McVicker, G., Pai, A.~A., Fondufe-Mittendorf, Y.~N., Lewellen, N., Michelini, K., Widom, J., Gilad, Y., and Pritchard, J.~K. (2012). \newblock Controls of {Nucleosome} {Positioning} in the {Human} {Genome}. \newblock {\em PLoS Genet}, 8(11):e1003036. \bibitem[Gerstein et~al., 2012]{gerstein_architecture_2012} Gerstein, M.~B., Kundaje, A., Hariharan, M., Landt, S.~G., Yan, K.-K., Cheng, C., Mu, X.~J., Khurana, E., Rozowsky, J., Alexander, R., Min, R., Alves, P., Abyzov, A., Addleman, N., Bhardwaj, N., Boyle, A.~P., Cayting, P., Charos, A., Chen, D.~Z., Cheng, Y., Clarke, D., Eastman, C., Euskirchen, G., Frietze, S., Fu, Y., Gertz, J., Grubert, F., Harmanci, A., Jain, P., Kasowski, M., Lacroute, P., Leng, J., Lian, J., Monahan, H., O’Geen, H., Ouyang, Z., Partridge, E.~C., Patacsil, D., Pauli, F., Raha, D., Ramirez, L., Reddy, T.~E., Reed, B., Shi, M., Slifer, T., Wang, J., Wu, L., Yang, X., Yip, K.~Y., Zilberman-Schapira, G., Batzoglou, S., Sidow, A., Farnham, P.~J., Myers, R.~M., Weissman, S.~M., and Snyder, M. (2012). \newblock Architecture of the human regulatory network derived from {ENCODE} data. \newblock {\em Nature}, 489(7414):91--100. \bibitem[Ghirlando and Felsenfeld, 2016]{ghirlando_ctcf:_2016} Ghirlando, R. and Felsenfeld, G. (2016). \newblock {CTCF}: making the right connections. \newblock {\em Genes \& Development}, 30(8):881--891. \bibitem[González-Blas et~al., 2019]{gonzalez-blas_cistopic:_2019} González-Blas, C.~B., Minnoye, L., Papasokrati, D., Aibar, S., Hulselmans, G., Christiaens, V., Davie, K., Wouters, J., and Aerts, S. (2019). \newblock {cisTopic}: cis-regulatory topic modeling on single-cell {ATAC}-seq data. \newblock {\em Nature Methods}, 16(5):397. \bibitem[Grant et~al., 2011]{grant_fimo:_2011} Grant, C.~E., Bailey, T.~L., and Noble, W.~S. (2011). \newblock {FIMO}: scanning for occurrences of a given motif. \newblock {\em Bioinformatics}, 27(7):1017--1018. \bibitem[Grossman et~al., 2018]{grossman_positional_2018} Grossman, S.~R., Engreitz, J., Ray, J.~P., Nguyen, T.~H., Hacohen, N., and Lander, E.~S. (2018). \newblock Positional specificity of different transcription factor classes within enhancers. \newblock {\em Proceedings of the National Academy of Sciences}, 115(30):E7222--E7230. \bibitem[Groux and Bucher, 2019]{groux_spar-k:_2019} Groux, R. and Bucher, P. (2019). \newblock {SPar}-{K}: a method to partition {NGS} signal data. \newblock {\em Bioinformatics}. \bibitem[Guo et~al., 2012]{guo_high_2012} Guo, Y., Mahony, S., and Gifford, D.~K. (2012). \newblock High {Resolution} {Genome} {Wide} {Binding} {Event} {Finding} and {Motif} {Discovery} {Reveals} {Transcription} {Factor} {Spatial} {Binding} {Constraints}. \newblock {\em PLOS Computational Biology}, 8(8):e1002638. \bibitem[Hagman and Lukin, 2005]{hagman_early_2005} Hagman, J. and Lukin, K. (2005). \newblock Early {B}-cell factor ‘pioneers’ the way for {B}-cell development. \newblock {\em Trends in Immunology}, 26(9):455--461. \bibitem[Heinz et~al., 2010]{heinz_simple_2010} Heinz, S., Benner, C., Spann, N., Bertolino, E., Lin, Y.~C., Laslo, P., Cheng, J.~X., Murre, C., Singh, H., and Glass, C.~K. (2010). \newblock Simple {Combinations} of {Lineage}-{Determining} {Transcription} {Factors} {Prime} cis-{Regulatory} {Elements} {Required} for {Macrophage} and {B} {Cell} {Identities}. \newblock {\em Molecular Cell}, 38(4):576--589. -\bibitem[Hepler, 2018]{hepler_10x_2018} -Hepler, L. (2018). -\newblock 10x {Genomics} takes gene imaging and analysis tools to the big - leagues. - \bibitem[Hertz et~al., 1990]{hertz_identification_1990} Hertz, G.~Z., Hartzell, G.~W., and Stormo, G.~D. (1990). \newblock Identification of consensus patterns in unaligned {DNA} sequences known to be functionally related. \newblock {\em Computer applications in the biosciences: CABIOS}, 6(2):81--92. \bibitem[Hon et~al., 2008]{hon_chromasig:_2008} Hon, G., Ren, B., and Wang, W. (2008). \newblock {ChromaSig}: {A} {Probabilistic} {Approach} to {Finding} {Common} {Chromatin} {Signatures} in the {Human} {Genome}. \newblock {\em PLOS Computational Biology}, 4(10):e1000201. \bibitem[Ioshikhes et~al., 2011]{ioshikhes_variety_2011} Ioshikhes, I., Hosid, S., and Pugh, B.~F. (2011). \newblock Variety of genomic {DNA} patterns for nucleosome positioning. \newblock {\em Genome Research}, 21(11):1863--1871. \bibitem[Isakova et~al., 2017]{isakova_smile-seq_2017} Isakova, A., Groux, R., Imbeault, M., Rainer, P., Alpern, D., Dainese, R., Ambrosini, G., Trono, D., Bucher, P., and Deplancke, B. (2017). \newblock {SMiLE}-seq identifies binding motifs of single and dimeric transcription factors. \newblock {\em Nature Methods}, advance online publication. \bibitem[Jolma et~al., 2010]{jolma_multiplexed_2010} Jolma, A., Kivioja, T., Toivonen, J., Cheng, L., Wei, G., Enge, M., Taipale, M., Vaquerizas, J.~M., Yan, J., Sillanpää, M.~J., Bonke, M., Palin, K., Talukder, S., Hughes, T.~R., Luscombe, N.~M., Ukkonen, E., and Taipale, J. (2010). \newblock Multiplexed massively parallel {SELEX} for characterization of human transcription factor binding specificities. \newblock {\em Genome Research}, 20(6):861--873. \bibitem[Jolma et~al., 2013]{jolma_dna-binding_2013} Jolma, A., Yan, J., Whitington, T., Toivonen, J., Nitta, K., Rastas, P., Morgunova, E., Enge, M., Taipale, M., Wei, G., Palin, K., Vaquerizas, J., Vincentelli, R., Luscombe, N., Hughes, T., Lemaire, P., Ukkonen, E., Kivioja, T., and Taipale, J. (2013). \newblock {DNA}-{Binding} {Specificities} of {Human} {Transcription} {Factors}. \newblock {\em Cell}, 152(1–2):327--339. \bibitem[Kent, 2002]{kent_blatblast-like_2002} Kent, W.~J. (2002). \newblock {BLAT}—{The} {BLAST}-{Like} {Alignment} {Tool}. \newblock {\em Genome Research}, 12(4):656--664. \bibitem[Khan et~al., 2018]{khan_jaspar_2018} Khan, A., Fornes, O., Stigliani, A., Gheorghe, M., Castro-Mondragon, J.~A., van der Lee, R., Bessy, A., Chèneby, J., Kulkarni, S.~R., Tan, G., Baranasic, D., Arenillas, D.~J., Sandelin, A., Vandepoele, K., Lenhard, B., Ballester, B., Wasserman, W.~W., Parcy, F., and Mathelier, A. (2018). \newblock {JASPAR} 2018: update of the open-access database of transcription factor binding profiles and its web framework. \newblock {\em Nucleic Acids Research}, 46(D1):D260--D266. \bibitem[Kiselev et~al., 2017]{kiselev_sc3:_2017} Kiselev, V.~Y., Kirschner, K., Schaub, M.~T., Andrews, T., Yiu, A., Chandra, T., Natarajan, K.~N., Reik, W., Barahona, M., Green, A.~R., and Hemberg, M. (2017). \newblock {SC}3: consensus clustering of single-cell {RNA}-seq data. \newblock {\em Nature Methods}, 14(5):483--486. \bibitem[Kulakovskiy et~al., 2018]{kulakovskiy_hocomoco:_2018} Kulakovskiy, I.~V., Vorontsov, I.~E., Yevshin, I.~S., Sharipov, R.~N., Fedorova, A.~D., Rumynskiy, E.~I., Medvedeva, Y.~A., Magana-Mora, A., Bajic, V.~B., Papatsenko, D.~A., Kolpakov, F.~A., and Makeev, V.~J. (2018). \newblock {HOCOMOCO}: towards a complete collection of transcription factor binding models for human and mouse via large-scale {ChIP}-{Seq} analysis. \newblock {\em Nucleic Acids Research}, 46(D1):D252--D259. \bibitem[Kulakovskiy et~al., 2016]{kulakovskiy_hocomoco:_2016} Kulakovskiy, I.~V., Vorontsov, I.~E., Yevshin, I.~S., Soboleva, A.~V., Kasianov, A.~S., Ashoor, H., Ba-alawi, W., Bajic, V.~B., Medvedeva, Y.~A., Kolpakov, F.~A., and Makeev, V.~J. (2016). \newblock {HOCOMOCO}: expansion and enhancement of the collection of transcription factor binding sites models. \newblock {\em Nucleic Acids Research}, 44(D1):D116--D125. \bibitem[Kundaje et~al., 2012]{kundaje_ubiquitous_2012} Kundaje, A., Kyriazopoulou-Panagiotopoulou, S., Libbrecht, M., Smith, C.~L., Raha, D., Winters, E.~E., Johnson, S.~M., Snyder, M., Batzoglou, S., and Sidow, A. (2012). \newblock Ubiquitous heterogeneity and asymmetry of the chromatin environment at regulatory elements. \newblock {\em Genome Research}, 22(9):1735--1747. \bibitem[Kurotaki et~al., 2017]{kurotaki_transcriptional_2017} Kurotaki, D., Sasaki, H., and Tamura, T. (2017). \newblock Transcriptional control of monocyte and macrophage development. \newblock {\em International Immunology}, 29(3):97--107. \bibitem[Langmead and Salzberg, 2012]{langmead_fast_2012} Langmead, B. and Salzberg, S.~L. (2012). \newblock Fast gapped-read alignment with {Bowtie} 2. \newblock {\em Nature Methods}, 9(4):357--359. \bibitem[Langmead et~al., 2009]{langmead_ultrafast_2009} Langmead, B., Trapnell, C., Pop, M., and Salzberg, S.~L. (2009). \newblock Ultrafast and memory-efficient alignment of short {DNA} sequences to the human genome. \newblock {\em Genome Biology}, 10(3):R25. \bibitem[Li et~al., 2009]{li_sequence_2009} Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., Marth, G., Abecasis, G., and Durbin, R. (2009). \newblock The {Sequence} {Alignment}/{Map} format and {SAMtools}. \newblock {\em Bioinformatics}, 25(16):2078--2079. \bibitem[Li et~al., 2019]{li_identification_2019} Li, Z., Schulz, M.~H., Look, T., Begemann, M., Zenke, M., and Costa, I.~G. (2019). \newblock Identification of transcription factor binding sites using {ATAC}-seq. \newblock {\em Genome Biology}, 20(1):45. \bibitem[Lizio et~al., 2015]{lizio_gateways_2015} Lizio, M., Harshbarger, J., Shimoji, H., Severin, J., Kasukawa, T., Sahin, S., Abugessaisa, I., Fukuda, S., Hori, F., Ishikawa-Kato, S., Mungall, C.~J., Arner, E., Baillie, J.~K., Bertin, N., Bono, H., de~Hoon, M., Diehl, A.~D., Dimont, E., Freeman, T.~C., Fujieda, K., Hide, W., Kaliyaperumal, R., Katayama, T., Lassmann, T., Meehan, T.~F., Nishikata, K., Ono, H., Rehli, M., Sandelin, A., Schultes, E.~A., ‘t Hoen, P.~A., Tatum, Z., Thompson, M., Toyoda, T., Wright, D.~W., Daub, C.~O., Itoh, M., Carninci, P., Hayashizaki, Y., Forrest, A.~R., Kawaji, H., and {the FANTOM consortium} (2015). \newblock Gateways to the {FANTOM}5 promoter level mammalian expression atlas. \newblock {\em Genome Biology}, 16(1):22. \bibitem[Losada, 2014]{losada_cohesin_2014} Losada, A. (2014). \newblock Cohesin in cancer: chromosome segregation and beyond. \newblock {\em Nature Reviews Cancer}, 14(6):389--393. \bibitem[Maerkl and Quake, 2007]{maerkl_systems_2007} Maerkl, S.~J. and Quake, S.~R. (2007). \newblock A {Systems} {Approach} to {Measuring} the {Binding} {Energy} {Landscapes} of {Transcription} {Factors}. \newblock {\em Science}, 315(5809):233--237. \bibitem[Maier et~al., 2004]{maier_early_2004} Maier, H., Ostraat, R., Gao, H., Fields, S., Shinton, S.~A., Medina, K.~L., Ikawa, T., Murre, C., Singh, H., Hardy, R.~R., and Hagman, J. (2004). \newblock Early {B} cell factor cooperates with {Runx}1 and mediates epigenetic changes associated with mb-1 transcription. \newblock {\em Nature Immunology}, 5(10):1069--1077. \bibitem[Marsland, 2015]{marsland_machine_2015-1} Marsland, S. (2015). \newblock {\em Machine {Learning}, {An} algorithmic {Perspective}, {Chapter} 7 {Probabilistic} {Learning}}. \newblock CRC Press, Boca Raton, second edition edition. \bibitem[Mathelier et~al., 2014]{mathelier_jaspar_2014} Mathelier, A., Zhao, X., Zhang, A.~W., Parcy, F., Worsley-Hunt, R., Arenillas, D.~J., Buchman, S., Chen, C.-y., Chou, A., Ienasescu, H., Lim, J., Shyr, C., Tan, G., Zhou, M., Lenhard, B., Sandelin, A., and Wasserman, W.~W. (2014). \newblock {JASPAR} 2014: an extensively expanded and updated open-access database of transcription factor binding profiles. \newblock {\em Nucleic Acids Research}, 42(D1):D142--D147. \bibitem[Nair et~al., 2014]{nair_probabilistic_2014} Nair, N.~U., Kumar, S., Moret, B. M.~E., and Bucher, P. (2014). \newblock Probabilistic partitioning methods to find significant patterns in {ChIP}-{Seq} data. \newblock {\em Bioinformatics}, 30(17):2406--2413. \bibitem[Neph et~al., 2012]{neph_expansive_2012} Neph, S., Vierstra, J., Stergachis, A.~B., Reynolds, A.~P., Haugen, E., Vernot, B., Thurman, R.~E., John, S., Sandstrom, R., Johnson, A.~K., Maurano, M.~T., Humbert, R., Rynes, E., Wang, H., Vong, S., Lee, K., Bates, D., Diegel, M., Roach, V., Dunn, D., Neri, J., Schafer, A., Hansen, R.~S., Kutyavin, T., Giste, E., Weaver, M., Canfield, T., Sabo, P., Zhang, M., Balasundaram, G., Byron, R., MacCoss, M.~J., Akey, J.~M., Bender, M.~A., Groudine, M., Kaul, R., and Stamatoyannopoulos, J.~A. (2012). \newblock An expansive human regulatory lexicon encoded in transcription factor footprints. \newblock {\em Nature}, 489(7414):83--90. \bibitem[Nielsen et~al., 2012]{nielsen_catchprofiles:_2012} Nielsen, F. G.~G., Markus, K.~G., Friborg, R.~M., Favrholdt, L.~M., Stunnenberg, H.~G., and Huynen, M. (2012). \newblock {CATCHprofiles}: {Clustering} and {Alignment} {Tool} for {ChIP} {Profiles}. \newblock {\em PLOS ONE}, 7(1):e28272. \bibitem[Ong and Corces, 2014]{ong_ctcf:_2014} Ong, C.-T. and Corces, V.~G. (2014). \newblock {CTCF}: an architectural protein bridging genome topology and function. \newblock {\em Nature Reviews Genetics}, 15(4):234--246. \bibitem[Orenstein and Shamir, 2014]{orenstein_comparative_2014} Orenstein, Y. and Shamir, R. (2014). \newblock A comparative analysis of transcription factor binding models learned from {PBM}, {HT}-{SELEX} and {ChIP} data. \newblock {\em Nucleic Acids Research}, 42(8):e63--e63. \bibitem[Ou et~al., 2018]{ou_motifstack_2018} Ou, J., Wolfe, S.~A., Brodsky, M.~H., and Zhu, L.~J. (2018). \newblock {motifStack} for the analysis of transcription factor binding site evolution. \newblock {\em Nature Methods}, 15(1):8--9. \bibitem[Pizzi and Ukkonen, 2008]{pizzi_fast_2008} Pizzi, C. and Ukkonen, E. (2008). \newblock Fast profile matching algorithms — {A} survey. \newblock {\em Theoretical Computer Science}, 395(2):137--157. \bibitem[Pollard et~al., 2010]{pollard_detection_2010} Pollard, K.~S., Hubisz, M.~J., Rosenbloom, K.~R., and Siepel, A. (2010). \newblock Detection of nonneutral substitution rates on mammalian phylogenies. \newblock {\em Genome Research}, 20(1):110--121. \bibitem[Quinlan and Hall, 2010]{quinlan_bedtools:_2010} Quinlan, A.~R. and Hall, I.~M. (2010). \newblock {BEDTools}: a flexible suite of utilities for comparing genomic features. \newblock {\em Bioinformatics}, 26(6):841--842. \bibitem[Raney et~al., 2014]{raney_track_2014} Raney, B.~J., Dreszer, T.~R., Barber, G.~P., Clawson, H., Fujita, P.~A., Wang, T., Nguyen, N., Paten, B., Zweig, A.~S., Karolchik, D., and Kent, W.~J. (2014). \newblock Track data hubs enable visualization of user-defined genome-wide annotations on the {UCSC} {Genome} {Browser}. \newblock {\em Bioinformatics}, 30(7):1003--1005. \bibitem[Rico et~al., 2017]{rico_comparative_2017} Rico, D., Martens, J.~H., Downes, K., Carrillo-de Santa-Pau, E., Pancaldi, V., Breschi, A., Richardson, D., Heath, S., Saeed, S., Frontini, M., Chen, L., Watt, S., Müller, F., Clarke, L., Kerstens, H.~H., Wilder, S.~P., Palumbo, E., Djebali, S., Raineri, E., Merkel, A., Esteve-Codina, A., Sultan, M., Bommel, A.~v., Gut, M., Yaspo, M.-L., Rubio, M., Fernandez, J.~M., Attwood, A., Torre, V. d.~l., Royo, R., Fragkogianni, S., Gelpí, J.~L., Torrents, D., Iotchkova, V., Logie, C., Aghajanirefah, A., Singh, A.~A., Janssen-Megens, E.~M., Berentsen, K., Erber, W., Rendon, A., Kostadima, M., Loos, R., Ent, M. A. v.~d., Kaan, A., Sharifi, N., Paul, D.~S., Ifrim, D.~C., Quintin, J., Love, M.~I., Pisano, D.~G., Burden, F., Foad, N., Farrow, S., Zerbino, D.~R., Dunham, I., Kuijpers, T., Lehrach, H., Lengauer, T., Bertone, P., Netea, M.~G., Vingron, M., Beck, S., Flicek, P., Gut, I., Ouwehand, W.~H., Bock, C., Soranzo, N., Guigo, R., Valencia, A., and Stunnenberg, H.~G. (2017). \newblock Comparative analysis of neutrophil and monocyte epigenomes. \newblock {\em bioRxiv}, page 237784. \bibitem[{Roadmap Epigenomics Consortium} et~al., 2015]{roadmap_epigenomics_consortium_integrative_2015} {Roadmap Epigenomics Consortium}, Kundaje, A., Meuleman, W., Ernst, J., Bilenky, M., Yen, A., Heravi-Moussavi, A., Kheradpour, P., Zhang, Z., Wang, J., Ziller, M.~J., Amin, V., Whitaker, J.~W., Schultz, M.~D., Ward, L.~D., Sarkar, A., Quon, G., Sandstrom, R.~S., Eaton, M.~L., Wu, Y.-C., Pfenning, A.~R., Wang, X., Claussnitzer, M., {Yaping Liu}, Coarfa, C., Alan~Harris, R., Shoresh, N., Epstein, C.~B., Gjoneska, E., Leung, D., Xie, W., David~Hawkins, R., Lister, R., Hong, C., Gascard, P., Mungall, A.~J., Moore, R., Chuah, E., Tam, A., Canfield, T.~K., Scott~Hansen, R., Kaul, R., Sabo, P.~J., Bansal, M.~S., Carles, A., Dixon, J.~R., Farh, K.-H., Feizi, S., Karlic, R., Kim, A.-R., Kulkarni, A., Li, D., Lowdon, R., Elliott, G., Mercer, T.~R., Neph, S.~J., Onuchic, V., Polak, P., Rajagopal, N., Ray, P., Sallari, R.~C., Siebenthall, K.~T., Sinnott-Armstrong, N.~A., Stevens, M., Thurman, R.~E., Wu, J., Zhang, B., Zhou, X., Beaudet, A.~E., Boyer, L.~A., Jager, P. L.~D., Farnham, P.~J., Fisher, S.~J., Haussler, D., Jones, S. J.~M., Li, W., Marra, M.~A., McManus, M.~T., Sunyaev, S., Thomson, J.~A., Tlsty, T.~D., Tsai, L.-H., Wang, W., Waterland, R.~A., Zhang, M.~Q., Chadwick, L.~H., Bernstein, B.~E., Costello, J.~F., Ecker, J.~R., Hirst, M., Meissner, A., Milosavljevic, A., Ren, B., Stamatoyannopoulos, J.~A., Wang, T., and Kellis, M. (2015). \newblock Integrative analysis of 111 reference human epigenomes. \newblock {\em Nature}, 518(7539):317--330. \bibitem[Rustici et~al., 2013]{rustici_arrayexpress_2013} Rustici, G., Kolesnikov, N., Brandizi, M., Burdett, T., Dylag, M., Emam, I., Farne, A., Hastings, E., Ison, J., Keays, M., Kurbatova, N., Malone, J., Mani, R., Mupo, A., Pedro~Pereira, R., Pilicheva, E., Rung, J., Sharma, A., Tang, Y.~A., Ternent, T., Tikhonov, A., Welter, D., Williams, E., Brazma, A., Parkinson, H., and Sarkans, U. (2013). \newblock {ArrayExpress} update—trends in database growth and links to data analysis tools. \newblock {\em Nucleic Acids Research}, 41(D1):D987--D990. \bibitem[Schones et~al., 2007]{schones_statistical_2007} Schones, D.~E., Smith, A.~D., and Zhang, M.~Q. (2007). \newblock Statistical significance of cis-regulatory modules. \newblock {\em BMC Bioinformatics}, 8(1):19. \bibitem[Schütz and Delorenzi, 2008]{schutz_mamot:_2008} Schütz, F. and Delorenzi, M. (2008). \newblock {MAMOT}: hidden {Markov} modeling tool. \newblock {\em Bioinformatics}, 24(11):1399--1400. \bibitem[Siepel et~al., 2005]{siepel_evolutionarily_2005} Siepel, A., Bejerano, G., Pedersen, J.~S., Hinrichs, A.~S., Hou, M., Rosenbloom, K., Clawson, H., Spieth, J., Hillier, L.~W., Richards, S., Weinstock, G.~M., Wilson, R.~K., Gibbs, R.~A., Kent, W.~J., Miller, W., and Haussler, D. (2005). \newblock Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. \newblock {\em Genome Research}, 15(8):1034--1050. \bibitem[Soufi et~al., 2015]{soufi_pioneer_2015} Soufi, A., Garcia, M.~F., Jaroszewicz, A., Osman, N., Pellegrini, M., and Zaret, K.~S. (2015). \newblock Pioneer {Transcription} {Factors} {Target} {Partial} {DNA} {Motifs} on {Nucleosomes} to {Initiate} {Reprogramming}. \newblock {\em Cell}, 161(3):555--568. \bibitem[Stedman et~al., 2008]{stedman_cohesins_2008} Stedman, W., Kang, H., Lin, S., Kissil, J.~L., Bartolomei, M.~S., and Lieberman, P.~M. (2008). \newblock Cohesins localize with {CTCF} at the {KSHV} latency control region and at cellular c-myc and {H}19 {Igf}2 insulators. \newblock {\em The EMBO Journal}, 27(4):654--666. \bibitem[Trifonov, 2011]{trifonov_cracking_2011} Trifonov, E.~N. (2011). \newblock Cracking the chromatin code: {Precise} rule of nucleosome positioning. \newblock {\em Physics of Life Reviews}, 8(1):39--50. \bibitem[Turatsinze et~al., 2008]{turatsinze_using_2008} Turatsinze, J.-V., Thomas-Chollier, M., Defrance, M., and Helden, J.~v. (2008). \newblock Using {RSAT} to scan genome sequences for transcription factor binding sites and cis -regulatory modules. \newblock {\em Nature Protocols}, 3(10):1578--1588. \bibitem[Vierstra and Stamatoyannopoulos, 2016]{vierstra_genomic_2016} Vierstra, J. and Stamatoyannopoulos, J.~A. (2016). \newblock Genomic footprinting. \newblock {\em Nature Methods}, 13(3):213--221. \bibitem[Voss and Hager, 2014]{voss_dynamic_2014} Voss, T.~C. and Hager, G.~L. (2014). \newblock Dynamic regulation of transcriptional states by chromatin and transcription factors. \newblock {\em Nature Reviews Genetics}, 15(2):69--81. \bibitem[Wang et~al., 2012]{wang_sequence_2012} Wang, J., Zhuang, J., Iyer, S., Lin, X., Whitfield, T.~W., Greven, M.~C., Pierce, B.~G., Dong, X., Kundaje, A., Cheng, Y., Rando, O.~J., Birney, E., Myers, R.~M., Noble, W.~S., Snyder, M., and Weng, Z. (2012). \newblock Sequence features and chromatin structure around the genomic regions bound by 119 human transcription factors. \newblock {\em Genome Research}, 22(9):1798--1812. \bibitem[Weirauch et~al., 2013]{weirauch_evaluation_2013} Weirauch, M.~T., Cote, A., Norel, R., Annala, M., Zhao, Y., Riley, T.~R., Saez-Rodriguez, J., Cokelaer, T., Vedenko, A., Talukder, S., {Dream5 Consortium}, Bussemaker, H.~J., Morris, Q.~D., Bulyk, M.~L., Stolovitzky, G., and Hughes, T.~R. (2013). \newblock Evaluation of methods for modeling transcription factor sequence specificity. \newblock {\em Nature Biotechnology}, 31(2):126--134. \bibitem[Wu et~al., 2016]{wu_biogps:_2016} Wu, C., Jin, X., Tsueng, G., Afrasiabi, C., and Su, A.~I. (2016). \newblock {BioGPS}: building your own mash-up of gene annotations and expression profiles. \newblock {\em Nucleic Acids Research}, 44(D1):D313--D316. \bibitem[Zaret and Carroll, 2011]{zaret_pioneer_2011} Zaret, K.~S. and Carroll, J.~S. (2011). \newblock Pioneer transcription factors: establishing competence for gene expression. \newblock {\em Genes \& Development}, 25(21):2227--2241. \bibitem[Zhang et~al., 2014]{zhang_canonical_2014} Zhang, Y., Vastenhouw, N.~L., Feng, J., Fu, K., Wang, C., Ge, Y., Pauli, A., Hummelen, P.~v., Schier, A.~F., and Liu, X.~S. (2014). \newblock Canonical nucleosome organization at promoters forms during genome activation. \newblock {\em Genome Research}, 24(2):260--266. \bibitem[Zhao et~al., 2005]{zhao_tred:_2005} Zhao, F., Xuan, Z., Liu, L., and Zhang, M.~Q. (2005). \newblock {TRED}: a {Transcriptional} {Regulatory} {Element} {Database} and a platform for in silico gene regulation studies. \newblock {\em Nucleic Acids Research}, 33(suppl\_1):D103--D107. \bibitem[Zhao et~al., 2009]{zhao_inferring_2009} Zhao, Y., Granas, D., and Stormo, G.~D. (2009). \newblock Inferring {Binding} {Energies} from {Selected} {Binding} {Sites}. \newblock {\em PLOS Comput Biol}, 5(12):e1000590. \end{thebibliography} diff --git a/my_thesis.blg b/my_thesis.blg index a7e7314..682ba77 100644 --- a/my_thesis.blg +++ b/my_thesis.blg @@ -1,58 +1,58 @@ This is BibTeX, Version 0.99d (TeX Live 2017/Debian) Capacity: max_strings=100000, hash_size=100000, hash_prime=85009 The top-level auxiliary file: my_thesis.aux A level-1 auxiliary file: head/dedication.aux A level-1 auxiliary file: head/acknowledgements.aux A level-1 auxiliary file: head/preface.aux A level-1 auxiliary file: head/abstracts.aux A level-1 auxiliary file: main/ch_introduction.aux A level-1 auxiliary file: main/ch_group_projects.aux A level-1 auxiliary file: main/ch_encode_peaks.aux A level-1 auxiliary file: main/ch_smile-seq.aux A level-1 auxiliary file: main/ch_atac-seq.aux A level-1 auxiliary file: tail/appendix.aux A level-1 auxiliary file: tail/biblio.aux The style file: apalike.bst A level-1 auxiliary file: tail/cv.aux Database file #1: tail/bibliography.bib -You've used 98 entries, +You've used 97 entries, 1935 wiz_defined-function locations, - 977 strings with 34114 characters, -and the built_in function-call counts, 60857 in all, are: -= -- 5362 -> -- 4853 -< -- 22 -+ -- 1849 -- -- 1826 -* -- 6392 -:= -- 10920 -add.period$ -- 293 -call.type$ -- 98 -change.case$ -- 1403 -chr.to.int$ -- 97 -cite$ -- 98 -duplicate$ -- 1378 -empty$ -- 2901 -format.name$ -- 1954 -if$ -- 11077 + 973 strings with 33986 characters, +and the built_in function-call counts, 60621 in all, are: += -- 5338 +> -- 4846 +< -- 21 ++ -- 1847 +- -- 1824 +* -- 6377 +:= -- 10873 +add.period$ -- 291 +call.type$ -- 97 +change.case$ -- 1397 +chr.to.int$ -- 96 +cite$ -- 97 +duplicate$ -- 1368 +empty$ -- 2886 +format.name$ -- 1951 +if$ -- 11034 int.to.chr$ -- 2 int.to.str$ -- 0 missing$ -- 98 -newline$ -- 492 -num.names$ -- 294 -pop$ -- 1197 +newline$ -- 488 +num.names$ -- 291 +pop$ -- 1193 preamble$ -- 1 -purify$ -- 1403 +purify$ -- 1397 quote$ -- 0 -skip$ -- 1016 +skip$ -- 1007 stack$ -- 0 -substring$ -- 3497 +substring$ -- 3485 swap$ -- 110 text.length$ -- 1 text.prefix$ -- 0 top$ -- 0 -type$ -- 586 +type$ -- 580 warning$ -- 0 -while$ -- 365 +while$ -- 363 width$ -- 0 -write$ -- 1272 +write$ -- 1262 diff --git a/my_thesis.log b/my_thesis.log index b2da094..0fa69d6 100644 --- a/my_thesis.log +++ b/my_thesis.log @@ -1,2791 +1,2790 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12) 19 NOV 2019 15:22 +This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12) 19 NOV 2019 18:25 entering extended mode restricted \write18 enabled. %&-line parsing enabled. **my_thesis.tex (./my_thesis.tex LaTeX2e <2017-04-15> Babel <3.18> and hyphenation patterns for 84 language(s) loaded. (./head/settings_epfl_template.tex (/usr/share/texlive/texmf-dist/tex/latex/base/book.cls Document Class: book 2014/09/29 v1.4h Standard LaTeX document class (/usr/share/texlive/texmf-dist/tex/latex/base/bk11.clo File: bk11.clo 2014/09/29 v1.4h Standard LaTeX file (size option) ) \c@part=\count79 \c@chapter=\count80 \c@section=\count81 \c@subsection=\count82 \c@subsubsection=\count83 \c@paragraph=\count84 \c@subparagraph=\count85 \c@figure=\count86 \c@table=\count87 \abovecaptionskip=\skip41 \belowcaptionskip=\skip42 \bibindent=\dimen102 ) (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty Package: fontenc 2017/04/05 v2.0i Standard LaTeX package (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file LaTeX Font Info: Redeclaring font encoding T1 on input line 48. )) (/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty Package: inputenc 2015/03/17 v1.2c Input encoding file \inpenc@prehook=\toks14 \inpenc@posthook=\toks15 (/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def File: utf8.def 2017/01/28 v1.1t UTF-8 support for inputenc Now handling font encoding OML ... ... no UTF-8 mapping file for font encoding OML Now handling font encoding T1 ... ... processing UTF-8 mapping file for font encoding T1 (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu File: t1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A0 (decimal 160) defining Unicode char U+00A1 (decimal 161) defining Unicode char U+00A3 (decimal 163) defining Unicode char U+00AB (decimal 171) defining Unicode char U+00AD (decimal 173) defining Unicode char U+00BB (decimal 187) defining Unicode char U+00BF (decimal 191) defining Unicode char U+00C0 (decimal 192) defining Unicode char U+00C1 (decimal 193) defining Unicode char U+00C2 (decimal 194) defining Unicode char U+00C3 (decimal 195) defining Unicode char U+00C4 (decimal 196) defining Unicode char U+00C5 (decimal 197) defining Unicode char U+00C6 (decimal 198) defining Unicode char U+00C7 (decimal 199) defining Unicode char U+00C8 (decimal 200) defining Unicode char U+00C9 (decimal 201) defining Unicode char U+00CA (decimal 202) defining Unicode char U+00CB (decimal 203) defining Unicode char U+00CC (decimal 204) defining Unicode char U+00CD (decimal 205) defining Unicode char U+00CE (decimal 206) defining Unicode char U+00CF (decimal 207) defining Unicode char U+00D0 (decimal 208) defining Unicode char U+00D1 (decimal 209) defining Unicode char U+00D2 (decimal 210) defining Unicode char U+00D3 (decimal 211) defining Unicode char U+00D4 (decimal 212) defining Unicode char U+00D5 (decimal 213) defining Unicode char U+00D6 (decimal 214) defining Unicode char U+00D8 (decimal 216) defining Unicode char U+00D9 (decimal 217) defining Unicode char U+00DA (decimal 218) defining Unicode char U+00DB (decimal 219) defining Unicode char U+00DC (decimal 220) defining Unicode char U+00DD (decimal 221) defining Unicode char U+00DE (decimal 222) defining Unicode char U+00DF (decimal 223) defining Unicode char U+00E0 (decimal 224) defining Unicode char U+00E1 (decimal 225) defining Unicode char U+00E2 (decimal 226) defining Unicode char U+00E3 (decimal 227) defining Unicode char U+00E4 (decimal 228) defining Unicode char U+00E5 (decimal 229) defining Unicode char U+00E6 (decimal 230) defining Unicode char U+00E7 (decimal 231) defining Unicode char U+00E8 (decimal 232) defining Unicode char U+00E9 (decimal 233) defining Unicode char U+00EA (decimal 234) defining Unicode char U+00EB (decimal 235) defining Unicode char U+00EC (decimal 236) defining Unicode char U+00ED (decimal 237) defining Unicode char U+00EE (decimal 238) defining Unicode char U+00EF (decimal 239) defining Unicode char U+00F0 (decimal 240) defining Unicode char U+00F1 (decimal 241) defining Unicode char U+00F2 (decimal 242) defining Unicode char U+00F3 (decimal 243) defining Unicode char U+00F4 (decimal 244) defining Unicode char U+00F5 (decimal 245) defining Unicode char U+00F6 (decimal 246) defining Unicode char U+00F8 (decimal 248) defining Unicode char U+00F9 (decimal 249) defining Unicode char U+00FA (decimal 250) defining Unicode char U+00FB (decimal 251) defining Unicode char U+00FC (decimal 252) defining Unicode char U+00FD (decimal 253) defining Unicode char U+00FE (decimal 254) defining Unicode char U+00FF (decimal 255) defining Unicode char U+0100 (decimal 256) defining Unicode char U+0101 (decimal 257) defining Unicode char U+0102 (decimal 258) defining Unicode char U+0103 (decimal 259) defining Unicode char U+0104 (decimal 260) defining Unicode char U+0105 (decimal 261) defining Unicode char U+0106 (decimal 262) defining Unicode char U+0107 (decimal 263) defining Unicode char U+0108 (decimal 264) defining Unicode char U+0109 (decimal 265) defining Unicode char U+010A (decimal 266) defining Unicode char U+010B (decimal 267) defining Unicode char U+010C (decimal 268) defining Unicode char U+010D (decimal 269) defining Unicode char U+010E (decimal 270) defining Unicode char U+010F (decimal 271) defining Unicode char U+0110 (decimal 272) defining Unicode char U+0111 (decimal 273) defining Unicode char U+0112 (decimal 274) defining Unicode char U+0113 (decimal 275) defining Unicode char U+0114 (decimal 276) defining Unicode char U+0115 (decimal 277) defining Unicode char U+0116 (decimal 278) defining Unicode char U+0117 (decimal 279) defining Unicode char U+0118 (decimal 280) defining Unicode char U+0119 (decimal 281) defining Unicode char U+011A (decimal 282) defining Unicode char U+011B (decimal 283) defining Unicode char U+011C (decimal 284) defining Unicode char U+011D (decimal 285) defining Unicode char U+011E (decimal 286) defining Unicode char U+011F (decimal 287) defining Unicode char U+0120 (decimal 288) defining Unicode char U+0121 (decimal 289) defining Unicode char U+0122 (decimal 290) defining Unicode char U+0123 (decimal 291) defining Unicode char U+0124 (decimal 292) defining Unicode char U+0125 (decimal 293) defining Unicode char U+0128 (decimal 296) defining Unicode char U+0129 (decimal 297) defining Unicode char U+012A (decimal 298) defining Unicode char U+012B (decimal 299) defining Unicode char U+012C (decimal 300) defining Unicode char U+012D (decimal 301) defining Unicode char U+012E (decimal 302) defining Unicode char U+012F (decimal 303) defining Unicode char U+0130 (decimal 304) defining Unicode char U+0131 (decimal 305) defining Unicode char U+0132 (decimal 306) defining Unicode char U+0133 (decimal 307) defining Unicode char U+0134 (decimal 308) defining Unicode char U+0135 (decimal 309) defining Unicode char U+0136 (decimal 310) defining Unicode char U+0137 (decimal 311) defining Unicode char U+0139 (decimal 313) defining Unicode char U+013A (decimal 314) defining Unicode char U+013B (decimal 315) defining Unicode char U+013C (decimal 316) defining Unicode char U+013D (decimal 317) defining Unicode char U+013E (decimal 318) defining Unicode char U+0141 (decimal 321) defining Unicode char U+0142 (decimal 322) defining Unicode char U+0143 (decimal 323) defining Unicode char U+0144 (decimal 324) defining Unicode char U+0145 (decimal 325) defining Unicode char U+0146 (decimal 326) defining Unicode char U+0147 (decimal 327) defining Unicode char U+0148 (decimal 328) defining Unicode char U+014A (decimal 330) defining Unicode char U+014B (decimal 331) defining Unicode char U+014C (decimal 332) defining Unicode char U+014D (decimal 333) defining Unicode char U+014E (decimal 334) defining Unicode char U+014F (decimal 335) defining Unicode char U+0150 (decimal 336) defining Unicode char U+0151 (decimal 337) defining Unicode char U+0152 (decimal 338) defining Unicode char U+0153 (decimal 339) defining Unicode char U+0154 (decimal 340) defining Unicode char U+0155 (decimal 341) defining Unicode char U+0156 (decimal 342) defining Unicode char U+0157 (decimal 343) defining Unicode char U+0158 (decimal 344) defining Unicode char U+0159 (decimal 345) defining Unicode char U+015A (decimal 346) defining Unicode char U+015B (decimal 347) defining Unicode char U+015C (decimal 348) defining Unicode char U+015D (decimal 349) defining Unicode char U+015E (decimal 350) defining Unicode char U+015F (decimal 351) defining Unicode char U+0160 (decimal 352) defining Unicode char U+0161 (decimal 353) defining Unicode char U+0162 (decimal 354) defining Unicode char U+0163 (decimal 355) defining Unicode char U+0164 (decimal 356) defining Unicode char U+0165 (decimal 357) defining Unicode char U+0168 (decimal 360) defining Unicode char U+0169 (decimal 361) defining Unicode char U+016A (decimal 362) defining Unicode char U+016B (decimal 363) defining Unicode char U+016C (decimal 364) defining Unicode char U+016D (decimal 365) defining Unicode char U+016E (decimal 366) defining Unicode char U+016F (decimal 367) defining Unicode char U+0170 (decimal 368) defining Unicode char U+0171 (decimal 369) defining Unicode char U+0172 (decimal 370) defining Unicode char U+0173 (decimal 371) defining Unicode char U+0174 (decimal 372) defining Unicode char U+0175 (decimal 373) defining Unicode char U+0176 (decimal 374) defining Unicode char U+0177 (decimal 375) defining Unicode char U+0178 (decimal 376) defining Unicode char U+0179 (decimal 377) defining Unicode char U+017A (decimal 378) defining Unicode char U+017B (decimal 379) defining Unicode char U+017C (decimal 380) defining Unicode char U+017D (decimal 381) defining Unicode char U+017E (decimal 382) defining Unicode char U+01CD (decimal 461) defining Unicode char U+01CE (decimal 462) defining Unicode char U+01CF (decimal 463) defining Unicode char U+01D0 (decimal 464) defining Unicode char U+01D1 (decimal 465) defining Unicode char U+01D2 (decimal 466) defining Unicode char U+01D3 (decimal 467) defining Unicode char U+01D4 (decimal 468) defining Unicode char U+01E2 (decimal 482) defining Unicode char U+01E3 (decimal 483) defining Unicode char U+01E6 (decimal 486) defining Unicode char U+01E7 (decimal 487) defining Unicode char U+01E8 (decimal 488) defining Unicode char U+01E9 (decimal 489) defining Unicode char U+01EA (decimal 490) defining Unicode char U+01EB (decimal 491) defining Unicode char U+01F0 (decimal 496) defining Unicode char U+01F4 (decimal 500) defining Unicode char U+01F5 (decimal 501) defining Unicode char U+0218 (decimal 536) defining Unicode char U+0219 (decimal 537) defining Unicode char U+021A (decimal 538) defining Unicode char U+021B (decimal 539) defining Unicode char U+0232 (decimal 562) defining Unicode char U+0233 (decimal 563) defining Unicode char U+1E02 (decimal 7682) defining Unicode char U+1E03 (decimal 7683) defining Unicode char U+200C (decimal 8204) defining Unicode char U+2010 (decimal 8208) defining Unicode char U+2011 (decimal 8209) defining Unicode char U+2012 (decimal 8210) defining Unicode char U+2013 (decimal 8211) defining Unicode char U+2014 (decimal 8212) defining Unicode char U+2015 (decimal 8213) defining Unicode char U+2018 (decimal 8216) defining Unicode char U+2019 (decimal 8217) defining Unicode char U+201A (decimal 8218) defining Unicode char U+201C (decimal 8220) defining Unicode char U+201D (decimal 8221) defining Unicode char U+201E (decimal 8222) defining Unicode char U+2030 (decimal 8240) defining Unicode char U+2031 (decimal 8241) defining Unicode char U+2039 (decimal 8249) defining Unicode char U+203A (decimal 8250) defining Unicode char U+2423 (decimal 9251) defining Unicode char U+1E20 (decimal 7712) defining Unicode char U+1E21 (decimal 7713) ) Now handling font encoding OT1 ... ... processing UTF-8 mapping file for font encoding OT1 (/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu File: ot1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A0 (decimal 160) defining Unicode char U+00A1 (decimal 161) defining Unicode char U+00A3 (decimal 163) defining Unicode char U+00AD (decimal 173) defining Unicode char U+00B8 (decimal 184) defining Unicode char U+00BF (decimal 191) defining Unicode char U+00C5 (decimal 197) defining Unicode char U+00C6 (decimal 198) defining Unicode char U+00D8 (decimal 216) defining Unicode char U+00DF (decimal 223) defining Unicode char U+00E6 (decimal 230) defining Unicode char U+00EC (decimal 236) defining Unicode char U+00ED (decimal 237) defining Unicode char U+00EE (decimal 238) defining Unicode char U+00EF (decimal 239) defining Unicode char U+00F8 (decimal 248) defining Unicode char U+0131 (decimal 305) defining Unicode char U+0141 (decimal 321) defining Unicode char U+0142 (decimal 322) defining Unicode char U+0152 (decimal 338) defining Unicode char U+0153 (decimal 339) defining Unicode char U+0174 (decimal 372) defining Unicode char U+0175 (decimal 373) defining Unicode char U+0176 (decimal 374) defining Unicode char U+0177 (decimal 375) defining Unicode char U+0218 (decimal 536) defining Unicode char U+0219 (decimal 537) defining Unicode char U+021A (decimal 538) defining Unicode char U+021B (decimal 539) defining Unicode char U+2013 (decimal 8211) defining Unicode char U+2014 (decimal 8212) defining Unicode char U+2018 (decimal 8216) defining Unicode char U+2019 (decimal 8217) defining Unicode char U+201C (decimal 8220) defining Unicode char U+201D (decimal 8221) ) Now handling font encoding OMS ... ... processing UTF-8 mapping file for font encoding OMS (/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu File: omsenc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A7 (decimal 167) defining Unicode char U+00B6 (decimal 182) defining Unicode char U+00B7 (decimal 183) defining Unicode char U+2020 (decimal 8224) defining Unicode char U+2021 (decimal 8225) defining Unicode char U+2022 (decimal 8226) ) Now handling font encoding OMX ... ... no UTF-8 mapping file for font encoding OMX Now handling font encoding U ... ... no UTF-8 mapping file for font encoding U defining Unicode char U+00A9 (decimal 169) defining Unicode char U+00AA (decimal 170) defining Unicode char U+00AE (decimal 174) defining Unicode char U+00BA (decimal 186) defining Unicode char U+02C6 (decimal 710) defining Unicode char U+02DC (decimal 732) defining Unicode char U+200C (decimal 8204) defining Unicode char U+2026 (decimal 8230) defining Unicode char U+2122 (decimal 8482) defining Unicode char U+2423 (decimal 9251) )) (/usr/share/texlive/texmf-dist/tex/latex/natbib/natbib.sty Package: natbib 2010/09/13 8.31b (PWD, AO) \bibhang=\skip43 \bibsep=\skip44 LaTeX Info: Redefining \cite on input line 694. \c@NAT@ctr=\count88 ) (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.sty Package: babel 2018/02/14 3.18 The Babel package (/usr/share/texlive/texmf-dist/tex/generic/babel/switch.def File: switch.def 2018/02/14 3.18 Babel switching mechanism ) (/usr/share/texlive/texmf-dist/tex/generic/babel-french/french.ldf Language: french 2018/02/04 v3.4b French support from the babel system (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.def File: babel.def 2018/02/14 3.18 Babel common definitions \babel@savecnt=\count89 \U@D=\dimen103 (/usr/share/texlive/texmf-dist/tex/generic/babel/txtbabel.def) \bbl@dirlevel=\count90 ) \l@acadian = a dialect from \language\l@french \FB@nonchar=\count91 Package babel Info: Making : an active character on input line 411. Package babel Info: Making ; an active character on input line 412. Package babel Info: Making ! an active character on input line 413. Package babel Info: Making ? an active character on input line 414. \FBguill@level=\count92 \FB@everypar=\toks16 \FB@Mht=\dimen104 \mc@charclass=\count93 \mc@charfam=\count94 \mc@charslot=\count95 \std@mcc=\count96 \dec@mcc=\count97 \c@FBcaption@count=\count98 \listindentFB=\skip45 \descindentFB=\skip46 \labelwidthFB=\skip47 \leftmarginFB=\skip48 \parindentFFN=\dimen105 \FBfnindent=\skip49 ) (/usr/share/texlive/texmf-dist/tex/generic/babel-german/german.ldf Language: german 2016/11/02 v2.9 German support for babel (traditional orthogra phy) (/usr/share/texlive/texmf-dist/tex/generic/babel-german/germanb.ldf Language: germanb 2016/11/02 v2.9 German support for babel (traditional orthogr aphy) Package babel Info: Making " an active character on input line 139. )) (/usr/share/texlive/texmf-dist/tex/generic/babel-english/english.ldf Language: english 2017/06/06 v3.3r English support from the babel system \l@canadian = a dialect from \language\l@american \l@australian = a dialect from \language\l@british \l@newzealand = a dialect from \language\l@british )) (/usr/share/texlive/texmf-dist/tex/latex/carlisle/scalefnt.sty) (/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty Package: keyval 2014/10/28 v1.15 key=value parser (DPC) \KV@toks@=\toks17 ) (/usr/share/texmf/tex/latex/lm/lmodern.sty Package: lmodern 2009/10/30 v1.6 Latin Modern Fonts LaTeX Font Info: Overwriting symbol font `operators' in version `normal' (Font) OT1/cmr/m/n --> OT1/lmr/m/n on input line 22. LaTeX Font Info: Overwriting symbol font `letters' in version `normal' (Font) OML/cmm/m/it --> OML/lmm/m/it on input line 23. LaTeX Font Info: Overwriting symbol font `symbols' in version `normal' (Font) OMS/cmsy/m/n --> OMS/lmsy/m/n on input line 24. LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal' (Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 25. LaTeX Font Info: Overwriting symbol font `operators' in version `bold' (Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 26. LaTeX Font Info: Overwriting symbol font `letters' in version `bold' (Font) OML/cmm/b/it --> OML/lmm/b/it on input line 27. LaTeX Font Info: Overwriting symbol font `symbols' in version `bold' (Font) OMS/cmsy/b/n --> OMS/lmsy/b/n on input line 28. LaTeX Font Info: Overwriting symbol font `largesymbols' in version `bold' (Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 29. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' (Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 31. LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal' (Font) OT1/cmss/m/n --> OT1/lmss/m/n on input line 32. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' (Font) OT1/cmr/m/it --> OT1/lmr/m/it on input line 33. LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal' (Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 34. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `bold' (Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 35. LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold' (Font) OT1/cmss/bx/n --> OT1/lmss/bx/n on input line 36. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' (Font) OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37. LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold' (Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38. ) (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier.sty Package: fourier 2005/01/01 1.4 fourier-GUTenberg package Now handling font encoding FML ... ... no UTF-8 mapping file for font encoding FML Now handling font encoding FMS ... ... no UTF-8 mapping file for font encoding FMS Now handling font encoding FMX ... ... no UTF-8 mapping file for font encoding FMX (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty Package: fontenc 2017/04/05 v2.0i Standard LaTeX package (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file LaTeX Font Info: Redeclaring font encoding T1 on input line 48. )) (/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty Package: textcomp 2017/04/05 v2.0i Standard LaTeX package Package textcomp Info: Sub-encoding information: (textcomp) 5 = only ISO-Adobe without \textcurrency (textcomp) 4 = 5 + \texteuro (textcomp) 3 = 4 + \textohm (textcomp) 2 = 3 + \textestimated + \textcurrency (textcomp) 1 = TS1 - \textcircled - \t (textcomp) 0 = TS1 (full) (textcomp) Font families with sub-encoding setting implement (textcomp) only a restricted character set as indicated. (textcomp) Family '?' is the default used for unknown fonts. (textcomp) See the documentation for details. Package textcomp Info: Setting ? sub-encoding to TS1/1 on input line 79. (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file Now handling font encoding TS1 ... ... processing UTF-8 mapping file for font encoding TS1 (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu File: ts1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A2 (decimal 162) defining Unicode char U+00A3 (decimal 163) defining Unicode char U+00A4 (decimal 164) defining Unicode char U+00A5 (decimal 165) defining Unicode char U+00A6 (decimal 166) defining Unicode char U+00A7 (decimal 167) defining Unicode char U+00A8 (decimal 168) defining Unicode char U+00A9 (decimal 169) defining Unicode char U+00AA (decimal 170) defining Unicode char U+00AC (decimal 172) defining Unicode char U+00AE (decimal 174) defining Unicode char U+00AF (decimal 175) defining Unicode char U+00B0 (decimal 176) defining Unicode char U+00B1 (decimal 177) defining Unicode char U+00B2 (decimal 178) defining Unicode char U+00B3 (decimal 179) defining Unicode char U+00B4 (decimal 180) defining Unicode char U+00B5 (decimal 181) defining Unicode char U+00B6 (decimal 182) defining Unicode char U+00B7 (decimal 183) defining Unicode char U+00B9 (decimal 185) defining Unicode char U+00BA (decimal 186) defining Unicode char U+00BC (decimal 188) defining Unicode char U+00BD (decimal 189) defining Unicode char U+00BE (decimal 190) defining Unicode char U+00D7 (decimal 215) defining Unicode char U+00F7 (decimal 247) defining Unicode char U+0192 (decimal 402) defining Unicode char U+02C7 (decimal 711) defining Unicode char U+02D8 (decimal 728) defining Unicode char U+02DD (decimal 733) defining Unicode char U+0E3F (decimal 3647) defining Unicode char U+2016 (decimal 8214) defining Unicode char U+2020 (decimal 8224) defining Unicode char U+2021 (decimal 8225) defining Unicode char U+2022 (decimal 8226) defining Unicode char U+2030 (decimal 8240) defining Unicode char U+2031 (decimal 8241) defining Unicode char U+203B (decimal 8251) defining Unicode char U+203D (decimal 8253) defining Unicode char U+2044 (decimal 8260) defining Unicode char U+204E (decimal 8270) defining Unicode char U+2052 (decimal 8274) defining Unicode char U+20A1 (decimal 8353) defining Unicode char U+20A4 (decimal 8356) defining Unicode char U+20A6 (decimal 8358) defining Unicode char U+20A9 (decimal 8361) defining Unicode char U+20AB (decimal 8363) defining Unicode char U+20AC (decimal 8364) defining Unicode char U+20B1 (decimal 8369) defining Unicode char U+2103 (decimal 8451) defining Unicode char U+2116 (decimal 8470) defining Unicode char U+2117 (decimal 8471) defining Unicode char U+211E (decimal 8478) defining Unicode char U+2120 (decimal 8480) defining Unicode char U+2122 (decimal 8482) defining Unicode char U+2126 (decimal 8486) defining Unicode char U+2127 (decimal 8487) defining Unicode char U+212E (decimal 8494) defining Unicode char U+2190 (decimal 8592) defining Unicode char U+2191 (decimal 8593) defining Unicode char U+2192 (decimal 8594) defining Unicode char U+2193 (decimal 8595) defining Unicode char U+2329 (decimal 9001) defining Unicode char U+232A (decimal 9002) defining Unicode char U+2422 (decimal 9250) defining Unicode char U+25E6 (decimal 9702) defining Unicode char U+25EF (decimal 9711) defining Unicode char U+266A (decimal 9834) )) LaTeX Info: Redefining \oldstylenums on input line 334. Package textcomp Info: Setting cmr sub-encoding to TS1/0 on input line 349. Package textcomp Info: Setting cmss sub-encoding to TS1/0 on input line 350. Package textcomp Info: Setting cmtt sub-encoding to TS1/0 on input line 351. Package textcomp Info: Setting cmvtt sub-encoding to TS1/0 on input line 352. Package textcomp Info: Setting cmbr sub-encoding to TS1/0 on input line 353. Package textcomp Info: Setting cmtl sub-encoding to TS1/0 on input line 354. Package textcomp Info: Setting ccr sub-encoding to TS1/0 on input line 355. Package textcomp Info: Setting ptm sub-encoding to TS1/4 on input line 356. Package textcomp Info: Setting pcr sub-encoding to TS1/4 on input line 357. Package textcomp Info: Setting phv sub-encoding to TS1/4 on input line 358. Package textcomp Info: Setting ppl sub-encoding to TS1/3 on input line 359. Package textcomp Info: Setting pag sub-encoding to TS1/4 on input line 360. Package textcomp Info: Setting pbk sub-encoding to TS1/4 on input line 361. Package textcomp Info: Setting pnc sub-encoding to TS1/4 on input line 362. Package textcomp Info: Setting pzc sub-encoding to TS1/4 on input line 363. Package textcomp Info: Setting bch sub-encoding to TS1/4 on input line 364. Package textcomp Info: Setting put sub-encoding to TS1/5 on input line 365. Package textcomp Info: Setting uag sub-encoding to TS1/5 on input line 366. Package textcomp Info: Setting ugq sub-encoding to TS1/5 on input line 367. Package textcomp Info: Setting ul8 sub-encoding to TS1/4 on input line 368. Package textcomp Info: Setting ul9 sub-encoding to TS1/4 on input line 369. Package textcomp Info: Setting augie sub-encoding to TS1/5 on input line 370. Package textcomp Info: Setting dayrom sub-encoding to TS1/3 on input line 371. Package textcomp Info: Setting dayroms sub-encoding to TS1/3 on input line 372. Package textcomp Info: Setting pxr sub-encoding to TS1/0 on input line 373. Package textcomp Info: Setting pxss sub-encoding to TS1/0 on input line 374. Package textcomp Info: Setting pxtt sub-encoding to TS1/0 on input line 375. Package textcomp Info: Setting txr sub-encoding to TS1/0 on input line 376. Package textcomp Info: Setting txss sub-encoding to TS1/0 on input line 377. Package textcomp Info: Setting txtt sub-encoding to TS1/0 on input line 378. Package textcomp Info: Setting lmr sub-encoding to TS1/0 on input line 379. Package textcomp Info: Setting lmdh sub-encoding to TS1/0 on input line 380. Package textcomp Info: Setting lmss sub-encoding to TS1/0 on input line 381. Package textcomp Info: Setting lmssq sub-encoding to TS1/0 on input line 382. Package textcomp Info: Setting lmvtt sub-encoding to TS1/0 on input line 383. Package textcomp Info: Setting lmtt sub-encoding to TS1/0 on input line 384. Package textcomp Info: Setting qhv sub-encoding to TS1/0 on input line 385. Package textcomp Info: Setting qag sub-encoding to TS1/0 on input line 386. Package textcomp Info: Setting qbk sub-encoding to TS1/0 on input line 387. Package textcomp Info: Setting qcr sub-encoding to TS1/0 on input line 388. Package textcomp Info: Setting qcs sub-encoding to TS1/0 on input line 389. Package textcomp Info: Setting qpl sub-encoding to TS1/0 on input line 390. Package textcomp Info: Setting qtm sub-encoding to TS1/0 on input line 391. Package textcomp Info: Setting qzc sub-encoding to TS1/0 on input line 392. Package textcomp Info: Setting qhvc sub-encoding to TS1/0 on input line 393. Package textcomp Info: Setting futs sub-encoding to TS1/4 on input line 394. Package textcomp Info: Setting futx sub-encoding to TS1/4 on input line 395. Package textcomp Info: Setting futj sub-encoding to TS1/4 on input line 396. Package textcomp Info: Setting hlh sub-encoding to TS1/3 on input line 397. Package textcomp Info: Setting hls sub-encoding to TS1/3 on input line 398. Package textcomp Info: Setting hlst sub-encoding to TS1/3 on input line 399. Package textcomp Info: Setting hlct sub-encoding to TS1/5 on input line 400. Package textcomp Info: Setting hlx sub-encoding to TS1/5 on input line 401. Package textcomp Info: Setting hlce sub-encoding to TS1/5 on input line 402. Package textcomp Info: Setting hlcn sub-encoding to TS1/5 on input line 403. Package textcomp Info: Setting hlcw sub-encoding to TS1/5 on input line 404. Package textcomp Info: Setting hlcf sub-encoding to TS1/5 on input line 405. Package textcomp Info: Setting pplx sub-encoding to TS1/3 on input line 406. Package textcomp Info: Setting pplj sub-encoding to TS1/3 on input line 407. Package textcomp Info: Setting ptmx sub-encoding to TS1/4 on input line 408. Package textcomp Info: Setting ptmj sub-encoding to TS1/4 on input line 409. ) (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier-orns.sty Package: fourier-orns 2004/01/30 1.1 fourier-ornaments package ) LaTeX Font Info: Redeclaring symbol font `operators' on input line 50. LaTeX Font Info: Encoding `OT1' has changed to `T1' for symbol font (Font) `operators' in the math version `normal' on input line 50. LaTeX Font Info: Overwriting symbol font `operators' in version `normal' (Font) OT1/lmr/m/n --> T1/futs/m/n on input line 50. LaTeX Font Info: Encoding `OT1' has changed to `T1' for symbol font (Font) `operators' in the math version `bold' on input line 50. LaTeX Font Info: Overwriting symbol font `operators' in version `bold' (Font) OT1/lmr/bx/n --> T1/futs/m/n on input line 50. LaTeX Font Info: Overwriting symbol font `operators' in version `bold' (Font) T1/futs/m/n --> T1/futs/b/n on input line 51. LaTeX Font Info: Redeclaring symbol font `letters' on input line 59. LaTeX Font Info: Encoding `OML' has changed to `FML' for symbol font (Font) `letters' in the math version `normal' on input line 59. LaTeX Font Info: Overwriting symbol font `letters' in version `normal' (Font) OML/lmm/m/it --> FML/futmi/m/it on input line 59. LaTeX Font Info: Encoding `OML' has changed to `FML' for symbol font (Font) `letters' in the math version `bold' on input line 59. LaTeX Font Info: Overwriting symbol font `letters' in version `bold' (Font) OML/lmm/b/it --> FML/futmi/m/it on input line 59. \symotherletters=\mathgroup4 LaTeX Font Info: Overwriting symbol font `letters' in version `bold' (Font) FML/futmi/m/it --> FML/futmi/b/it on input line 61. LaTeX Font Info: Overwriting symbol font `otherletters' in version `bold' (Font) FML/futm/m/it --> FML/futm/b/it on input line 62. LaTeX Font Info: Redeclaring math symbol \Gamma on input line 63. LaTeX Font Info: Redeclaring math symbol \Delta on input line 64. LaTeX Font Info: Redeclaring math symbol \Theta on input line 65. LaTeX Font Info: Redeclaring math symbol \Lambda on input line 66. LaTeX Font Info: Redeclaring math symbol \Xi on input line 67. LaTeX Font Info: Redeclaring math symbol \Pi on input line 68. LaTeX Font Info: Redeclaring math symbol \Sigma on input line 69. LaTeX Font Info: Redeclaring math symbol \Upsilon on input line 70. LaTeX Font Info: Redeclaring math symbol \Phi on input line 71. LaTeX Font Info: Redeclaring math symbol \Psi on input line 72. LaTeX Font Info: Redeclaring math symbol \Omega on input line 73. LaTeX Font Info: Redeclaring symbol font `symbols' on input line 113. LaTeX Font Info: Encoding `OMS' has changed to `FMS' for symbol font (Font) `symbols' in the math version `normal' on input line 113. LaTeX Font Info: Overwriting symbol font `symbols' in version `normal' (Font) OMS/lmsy/m/n --> FMS/futm/m/n on input line 113. LaTeX Font Info: Encoding `OMS' has changed to `FMS' for symbol font (Font) `symbols' in the math version `bold' on input line 113. LaTeX Font Info: Overwriting symbol font `symbols' in version `bold' (Font) OMS/lmsy/b/n --> FMS/futm/m/n on input line 113. LaTeX Font Info: Redeclaring symbol font `largesymbols' on input line 114. LaTeX Font Info: Encoding `OMX' has changed to `FMX' for symbol font (Font) `largesymbols' in the math version `normal' on input line 1 14. LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal' (Font) OMX/lmex/m/n --> FMX/futm/m/n on input line 114. LaTeX Font Info: Encoding `OMX' has changed to `FMX' for symbol font (Font) `largesymbols' in the math version `bold' on input line 114 . LaTeX Font Info: Overwriting symbol font `largesymbols' in version `bold' (Font) OMX/lmex/m/n --> FMX/futm/m/n on input line 114. LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 115. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' (Font) OT1/lmr/bx/n --> T1/futs/bx/n on input line 115. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `bold' (Font) OT1/lmr/bx/n --> T1/futs/bx/n on input line 115. LaTeX Font Info: Redeclaring math alphabet \mathrm on input line 116. LaTeX Font Info: Redeclaring math alphabet \mathit on input line 117. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' (Font) OT1/lmr/m/it --> T1/futs/m/it on input line 117. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' (Font) OT1/lmr/bx/it --> T1/futs/m/it on input line 117. LaTeX Font Info: Redeclaring math alphabet \mathcal on input line 118. LaTeX Font Info: Redeclaring math symbol \parallel on input line 134. LaTeX Font Info: Redeclaring math symbol \hbar on input line 148. LaTeX Font Info: Redeclaring math symbol \varkappa on input line 186. LaTeX Font Info: Redeclaring math symbol \varvarrho on input line 187. LaTeX Font Info: Redeclaring math delimiter \Vert on input line 210. LaTeX Font Info: Redeclaring math delimiter \vert on input line 215. LaTeX Font Info: Redeclaring math delimiter \Downarrow on input line 225. LaTeX Font Info: Redeclaring math delimiter \backslash on input line 227. LaTeX Font Info: Redeclaring math delimiter \rangle on input line 229. LaTeX Font Info: Redeclaring math delimiter \langle on input line 231. LaTeX Font Info: Redeclaring math delimiter \rbrace on input line 233. LaTeX Font Info: Redeclaring math delimiter \lbrace on input line 235. LaTeX Font Info: Redeclaring math delimiter \rceil on input line 237. LaTeX Font Info: Redeclaring math delimiter \lceil on input line 239. LaTeX Font Info: Redeclaring math delimiter \rfloor on input line 241. LaTeX Font Info: Redeclaring math delimiter \lfloor on input line 243. LaTeX Font Info: Redeclaring math accent \acute on input line 247. LaTeX Font Info: Redeclaring math accent \grave on input line 248. LaTeX Font Info: Redeclaring math accent \ddot on input line 249. LaTeX Font Info: Redeclaring math accent \tilde on input line 250. LaTeX Font Info: Redeclaring math accent \bar on input line 251. LaTeX Font Info: Redeclaring math accent \breve on input line 252. LaTeX Font Info: Redeclaring math accent \check on input line 253. LaTeX Font Info: Redeclaring math accent \hat on input line 254. LaTeX Font Info: Redeclaring math accent \dot on input line 255. LaTeX Font Info: Redeclaring math accent \mathring on input line 256. \symUfutm=\mathgroup5 ) (/usr/share/texlive/texmf-dist/tex/latex/setspace/setspace.sty Package: setspace 2011/12/19 v6.7a set line spacing ) (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphicx.sty Package: graphicx 2017/06/01 v1.1a Enhanced LaTeX Graphics (DPC,SPQR) (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphics.sty Package: graphics 2017/06/25 v1.2c Standard LaTeX Graphics (DPC,SPQR) (/usr/share/texlive/texmf-dist/tex/latex/graphics/trig.sty Package: trig 2016/01/03 v1.10 sin cos tan (DPC) ) (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/graphics.cfg File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration ) Package graphics Info: Driver file: pdftex.def on input line 99. (/usr/share/texlive/texmf-dist/tex/latex/graphics-def/pdftex.def File: pdftex.def 2018/01/08 v1.0l Graphics/color driver for pdftex )) \Gin@req@height=\dimen106 \Gin@req@width=\dimen107 ) (/usr/share/texlive/texmf-dist/tex/latex/xcolor/xcolor.sty Package: xcolor 2016/05/11 v2.12 LaTeX color extensions (UK) (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/color.cfg File: color.cfg 2016/01/02 v1.6 sample color configuration ) Package xcolor Info: Driver file: pdftex.def on input line 225. Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1348. Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1352. Package xcolor Info: Model `RGB' extended on input line 1364. Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1366. Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1367. Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1368. Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1369. Package xcolor Info: Model `Gray' substituted by `gray' on input line 1370. Package xcolor Info: Model `wave' substituted by `hsb' on input line 1371. ) (/usr/share/texlive/texmf-dist/tex/latex/subfig/subfig.sty Package: subfig 2005/06/28 ver: 1.3 subfig package (/usr/share/texlive/texmf-dist/tex/latex/caption/caption.sty Package: caption 2016/02/21 v3.3-144 Customizing captions (AR) (/usr/share/texlive/texmf-dist/tex/latex/caption/caption3.sty Package: caption3 2016/05/22 v1.7-166 caption3 kernel (AR) Package caption3 Info: TeX engine: e-TeX on input line 67. \captionmargin=\dimen108 \captionmargin@=\dimen109 \captionwidth=\dimen110 \caption@tempdima=\dimen111 \caption@indent=\dimen112 \caption@parindent=\dimen113 \caption@hangindent=\dimen114 ) \c@ContinuedFloat=\count99 ) \c@KVtest=\count100 \sf@farskip=\skip50 \sf@captopadj=\dimen115 \sf@capskip=\skip51 \sf@nearskip=\skip52 \c@subfigure=\count101 \c@subfigure@save=\count102 \c@lofdepth=\count103 \c@subtable=\count104 \c@subtable@save=\count105 \c@lotdepth=\count106 \sf@top=\skip53 \sf@bottom=\skip54 ) (/usr/share/texlive/texmf-dist/tex/latex/booktabs/booktabs.sty Package: booktabs 2016/04/27 v1.618033 publication quality tables \heavyrulewidth=\dimen116 \lightrulewidth=\dimen117 \cmidrulewidth=\dimen118 \belowrulesep=\dimen119 \belowbottomsep=\dimen120 \aboverulesep=\dimen121 \abovetopsep=\dimen122 \cmidrulesep=\dimen123 \cmidrulekern=\dimen124 \defaultaddspace=\dimen125 \@cmidla=\count107 \@cmidlb=\count108 \@aboverulesep=\dimen126 \@belowrulesep=\dimen127 \@thisruleclass=\count109 \@lastruleclass=\count110 \@thisrulewidth=\dimen128 ) (/usr/share/texlive/texmf-dist/tex/latex/lipsum/lipsum.sty Package: lipsum 2014/07/27 v1.3 150 paragraphs of Lorem Ipsum dummy text \c@lips@count=\count111 ) (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.sty Package: microtype 2018/01/14 v2.7a Micro-typographical refinements (RS) \MT@toks=\toks18 \MT@count=\count112 LaTeX Info: Redefining \textls on input line 793. \MT@outer@kern=\dimen129 LaTeX Info: Redefining \textmicrotypecontext on input line 1339. \MT@listname@count=\count113 (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype-pdftex.def File: microtype-pdftex.def 2018/01/14 v2.7a Definitions specific to pdftex (RS) LaTeX Info: Redefining \lsstyle on input line 913. LaTeX Info: Redefining \lslig on input line 913. \MT@outer@space=\skip55 ) Package microtype Info: Loading configuration file microtype.cfg. (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.cfg File: microtype.cfg 2018/01/14 v2.7a microtype main configuration file (RS) )) (/usr/share/texlive/texmf-dist/tex/latex/url/url.sty \Urlmuskip=\muskip10 Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. ) (/usr/share/texlive/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty Package: fancyhdr 2017/06/30 v3.9a Extensive control of page headers and footer s \f@nch@headwidth=\skip56 \f@nch@O@elh=\skip57 \f@nch@O@erh=\skip58 \f@nch@O@olh=\skip59 \f@nch@O@orh=\skip60 \f@nch@O@elf=\skip61 \f@nch@O@erf=\skip62 \f@nch@O@olf=\skip63 \f@nch@O@orf=\skip64 ) (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.sty \lst@mode=\count114 \lst@gtempboxa=\box26 \lst@token=\toks19 \lst@length=\count115 \lst@currlwidth=\dimen130 \lst@column=\count116 \lst@pos=\count117 \lst@lostspace=\dimen131 \lst@width=\dimen132 \lst@newlines=\count118 \lst@lineno=\count119 \lst@maxwidth=\dimen133 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz) \c@lstnumber=\count120 \lst@skipnumbers=\count121 \lst@framebox=\box27 ) (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.cfg File: listings.cfg 2015/06/04 1.6 listings configuration )) Package: listings 2015/06/04 1.6 (Carsten Heinz) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty File: lstlang1.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty File: lstlang2.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty File: lstlang3.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty File: lstlang1.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty File: lstlang2.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty File: lstlang3.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty File: lstlang1.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty File: lstlang2.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty File: lstlang3.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz) ) (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hyperref.sty Package: hyperref 2018/02/06 v6.86b Hypertext links for LaTeX (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty Package: hobsub-hyperref 2016/05/16 v1.14 Bundle oberdiek, subset hyperref (HO) (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty Package: hobsub-generic 2016/05/16 v1.14 Bundle oberdiek, subset generic (HO) Package: hobsub 2016/05/16 v1.14 Construct package bundles (HO) Package: infwarerr 2016/05/16 v1.4 Providing info/warning/error messages (HO) Package: ltxcmds 2016/05/16 v1.23 LaTeX kernel commands for general use (HO) Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO) Package ifluatex Info: LuaTeX not detected. Package: ifvtex 2016/05/16 v1.6 Detect VTeX and its facilities (HO) Package ifvtex Info: VTeX not detected. Package: intcalc 2016/05/16 v1.2 Expandable calculations with integers (HO) Package: ifpdf 2017/03/15 v3.2 Provides the ifpdf switch Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO) Package etexcmds Info: Could not find \expanded. (etexcmds) That can mean that you are not using pdfTeX 1.50 or (etexcmds) that some package has redefined \expanded. (etexcmds) In the latter case, load this package earlier. Package: kvsetkeys 2016/05/16 v1.17 Key value parser (HO) Package: kvdefinekeys 2016/05/16 v1.4 Define keys (HO) Package: pdftexcmds 2018/01/21 v0.26 Utility functions of pdfTeX for LuaTeX (HO ) Package pdftexcmds Info: LuaTeX not detected. Package pdftexcmds Info: \pdf@primitive is available. Package pdftexcmds Info: \pdf@ifprimitive is available. Package pdftexcmds Info: \pdfdraftmode found. Package: pdfescape 2016/05/16 v1.14 Implements pdfTeX's escape features (HO) Package: bigintcalc 2016/05/16 v1.4 Expandable calculations on big integers (HO ) Package: bitset 2016/05/16 v1.2 Handle bit-vector datatype (HO) Package: uniquecounter 2016/05/16 v1.3 Provide unlimited unique counter (HO) ) Package hobsub Info: Skipping package `hobsub' (already loaded). Package: letltxmacro 2016/05/16 v1.5 Let assignment for LaTeX macros (HO) Package: hopatch 2016/05/16 v1.3 Wrapper for package hooks (HO) Package: xcolor-patch 2016/05/16 xcolor patch Package: atveryend 2016/05/16 v1.9 Hooks at the very end of document (HO) Package atveryend Info: \enddocument detected (standard20110627). Package: atbegshi 2016/06/09 v1.18 At begin shipout hook (HO) Package: refcount 2016/05/16 v3.5 Data extraction from label references (HO) Package: hycolor 2016/05/16 v1.8 Color options for hyperref/bookmark (HO) ) (/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/auxhook.sty Package: auxhook 2016/05/16 v1.4 Hooks for auxiliary files (HO) ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/kvoptions.sty Package: kvoptions 2016/05/16 v3.12 Key value format for package options (HO) ) \@linkdim=\dimen134 \Hy@linkcounter=\count122 \Hy@pagecounter=\count123 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/pd1enc.def File: pd1enc.def 2018/02/06 v6.86b Hyperref: PDFDocEncoding definition (HO) Now handling font encoding PD1 ... ... no UTF-8 mapping file for font encoding PD1 ) \Hy@SavedSpaceFactor=\count124 (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/hyperref.cfg File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive ) Package hyperref Info: Hyper figures OFF on input line 4509. Package hyperref Info: Link nesting OFF on input line 4514. Package hyperref Info: Hyper index ON on input line 4517. Package hyperref Info: Plain pages OFF on input line 4524. Package hyperref Info: Backreferencing OFF on input line 4529. Package hyperref Info: Implicit mode ON; LaTeX internals redefined. Package hyperref Info: Bookmarks ON on input line 4762. \c@Hy@tempcnt=\count125 LaTeX Info: Redefining \url on input line 5115. \XeTeXLinkMargin=\dimen135 \Fld@menulength=\count126 \Field@Width=\dimen136 \Fld@charsize=\dimen137 Package hyperref Info: Hyper figures OFF on input line 6369. Package hyperref Info: Link nesting OFF on input line 6374. Package hyperref Info: Hyper index ON on input line 6377. Package hyperref Info: backreferencing OFF on input line 6384. Package hyperref Info: Link coloring OFF on input line 6389. Package hyperref Info: Link coloring with OCG OFF on input line 6394. Package hyperref Info: PDF/A mode OFF on input line 6399. LaTeX Info: Redefining \ref on input line 6439. LaTeX Info: Redefining \pageref on input line 6443. \Hy@abspage=\count127 \c@Item=\count128 \c@Hfootnote=\count129 ) Package hyperref Info: Driver (autodetected): hpdftex. (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hpdftex.def File: hpdftex.def 2018/02/06 v6.86b Hyperref driver for pdfTeX \Fld@listcount=\count130 \c@bookmark@seq@number=\count131 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty Package: rerunfilecheck 2016/05/16 v1.8 Rerun checks for auxiliary files (HO) Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2 82. ) \Hy@SectionHShift=\skip65 ) Package hyperref Info: Option `colorlinks' set `true' on input line 105. (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pdfpages.sty Package: pdfpages 2017/10/31 v0.5l Insert pages of external PDF documents (AM) (/usr/share/texlive/texmf-dist/tex/latex/base/ifthen.sty Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC) ) (/usr/share/texlive/texmf-dist/tex/latex/tools/calc.sty Package: calc 2014/10/28 v4.3 Infix arithmetic (KKT,FJ) \calc@Acount=\count132 \calc@Bcount=\count133 \calc@Adimen=\dimen138 \calc@Bdimen=\dimen139 \calc@Askip=\skip66 \calc@Bskip=\skip67 LaTeX Info: Redefining \setlength on input line 80. LaTeX Info: Redefining \addtolength on input line 81. \calc@Ccount=\count134 \calc@Cskip=\skip68 ) (/usr/share/texlive/texmf-dist/tex/latex/eso-pic/eso-pic.sty Package: eso-pic 2015/07/21 v2.0g eso-pic (RN) ) \AM@pagewidth=\dimen140 \AM@pageheight=\dimen141 (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pppdftex.def File: pppdftex.def 2017/10/31 v0.5l Pdfpages driver for pdfTeX (AM) ) \AM@pagebox=\box28 \AM@global@opts=\toks20 \AM@toc@title=\toks21 \c@AM@survey=\count135 \AM@templatesizebox=\box29 ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bookmark.sty Package: bookmark 2016/05/17 v1.26 PDF bookmarks (HO) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bkm-pdftex.def File: bkm-pdftex.def 2016/05/17 v1.26 bookmark driver for pdfTeX (HO) \BKM@id=\count136 )) (/usr/share/texlive/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex \pgfutil@everybye=\toks22 \pgfutil@tempdima=\dimen142 \pgfutil@tempdimb=\dimen143 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common-lists.t ex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def \pgfutil@abb=\box30 (/usr/share/texlive/texmf-dist/tex/latex/ms/everyshi.sty Package: everyshi 2001/05/15 v3.00 EveryShipout Package (MS) )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex Package: pgfrcs 2015/08/07 v3.0.1a (rcs-revision 1.31) )) Package: pgf 2015/08/07 v3.0.1a (rcs-revision 1.15) (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex Package: pgfsys 2014/07/09 v3.0.1a (rcs-revision 1.48) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex \pgfkeys@pathtoks=\toks23 \pgfkeys@temptoks=\toks24 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeysfiltered.code.t ex \pgfkeys@tmptoks=\toks25 )) \pgf@x=\dimen144 \pgf@y=\dimen145 \pgf@xa=\dimen146 \pgf@ya=\dimen147 \pgf@xb=\dimen148 \pgf@yb=\dimen149 \pgf@xc=\dimen150 \pgf@yc=\dimen151 \w@pgf@writea=\write3 \r@pgf@reada=\read1 \c@pgf@counta=\count137 \c@pgf@countb=\count138 \c@pgf@countc=\count139 \c@pgf@countd=\count140 \t@pgf@toka=\toks26 \t@pgf@tokb=\toks27 \t@pgf@tokc=\toks28 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg File: pgf.cfg 2008/05/14 (rcs-revision 1.7) ) Driver file for pgf: pgfsys-pdftex.def (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def File: pgfsys-pdftex.def 2014/10/11 (rcs-revision 1.35) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.de f File: pgfsys-common-pdf.def 2013/10/10 (rcs-revision 1.13) ))) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code. tex File: pgfsyssoftpath.code.tex 2013/09/09 (rcs-revision 1.9) \pgfsyssoftpath@smallbuffer@items=\count141 \pgfsyssoftpath@bigbuffer@items=\count142 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code. tex File: pgfsysprotocol.code.tex 2006/10/16 (rcs-revision 1.4) )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex Package: pgfcore 2010/04/11 v3.0.1a (rcs-revision 1.7) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex \pgfmath@dimen=\dimen152 \pgfmath@count=\count143 \pgfmath@box=\box31 \pgfmath@toks=\toks29 \pgfmath@stack@operand=\toks30 \pgfmath@stack@operation=\toks31 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code .tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonomet ric.code.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.cod e.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison .code.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code. tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code .tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code. tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerari thmetics.code.tex))) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex \c@pgfmathroundto@lastzeros=\count144 )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.te x File: pgfcorepoints.code.tex 2013/10/07 (rcs-revision 1.27) \pgf@picminx=\dimen153 \pgf@picmaxx=\dimen154 \pgf@picminy=\dimen155 \pgf@picmaxy=\dimen156 \pgf@pathminx=\dimen157 \pgf@pathmaxx=\dimen158 \pgf@pathminy=\dimen159 \pgf@pathmaxy=\dimen160 \pgf@xx=\dimen161 \pgf@xy=\dimen162 \pgf@yx=\dimen163 \pgf@yy=\dimen164 \pgf@zx=\dimen165 \pgf@zy=\dimen166 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct. code.tex File: pgfcorepathconstruct.code.tex 2013/10/07 (rcs-revision 1.29) \pgf@path@lastx=\dimen167 \pgf@path@lasty=\dimen168 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code .tex File: pgfcorepathusage.code.tex 2014/11/02 (rcs-revision 1.24) \pgf@shorten@end@additional=\dimen169 \pgf@shorten@start@additional=\dimen170 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.te x File: pgfcorescopes.code.tex 2015/05/08 (rcs-revision 1.46) \pgfpic=\box32 \pgf@hbox=\box33 \pgf@layerbox@main=\box34 \pgf@picture@serial@count=\count145 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.c ode.tex File: pgfcoregraphicstate.code.tex 2014/11/02 (rcs-revision 1.12) \pgflinewidth=\dimen171 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformation s.code.tex File: pgfcoretransformations.code.tex 2015/08/07 (rcs-revision 1.20) \pgf@pt@x=\dimen172 \pgf@pt@y=\dimen173 \pgf@pt@temp=\dimen174 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex File: pgfcorequick.code.tex 2008/10/09 (rcs-revision 1.3) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.t ex File: pgfcoreobjects.code.tex 2006/10/11 (rcs-revision 1.2) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing .code.tex File: pgfcorepathprocessing.code.tex 2013/09/09 (rcs-revision 1.9) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.te x File: pgfcorearrows.code.tex 2015/05/14 (rcs-revision 1.43) \pgfarrowsep=\dimen175 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex File: pgfcoreshade.code.tex 2013/07/15 (rcs-revision 1.15) \pgf@max=\dimen176 \pgf@sys@shading@range@num=\count146 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex File: pgfcoreimage.code.tex 2013/07/15 (rcs-revision 1.18) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code. tex File: pgfcoreexternal.code.tex 2014/07/09 (rcs-revision 1.21) \pgfexternal@startupbox=\box35 )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.te x File: pgfcorelayers.code.tex 2013/07/18 (rcs-revision 1.7) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.c ode.tex File: pgfcoretransparency.code.tex 2013/09/30 (rcs-revision 1.5) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code. tex File: pgfcorepatterns.code.tex 2013/11/07 (rcs-revision 1.5) ))) (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex File: pgfmoduleshapes.code.tex 2014/03/21 (rcs-revision 1.35) \pgfnodeparttextbox=\box36 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex File: pgfmoduleplot.code.tex 2015/08/03 (rcs-revision 1.13) ) (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65 .sty Package: pgfcomp-version-0-65 2007/07/03 v3.0.1a (rcs-revision 1.7) \pgf@nodesepstart=\dimen177 \pgf@nodesepend=\dimen178 ) (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18 .sty Package: pgfcomp-version-1-18 2007/07/23 v3.0.1a (rcs-revision 1.1) )) (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (/usr/share/texlive/texmf-dist/tex/latex/pgf/math/pgfmath.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex Package: pgffor 2013/12/13 v3.0.1a (rcs-revision 1.25) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex) \pgffor@iter=\dimen179 \pgffor@skip=\dimen180 \pgffor@stack=\toks32 \pgffor@toks=\toks33 )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex Package: tikz 2015/08/07 v3.0.1a (rcs-revision 1.151) (/usr/share/texlive/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers .code.tex File: pgflibraryplothandlers.code.tex 2013/08/31 v3.0.1a (rcs-revision 1.20) \pgf@plot@mark@count=\count147 \pgfplotmarksize=\dimen181 ) \tikz@lastx=\dimen182 \tikz@lasty=\dimen183 \tikz@lastxsaved=\dimen184 \tikz@lastysaved=\dimen185 \tikzleveldistance=\dimen186 \tikzsiblingdistance=\dimen187 \tikz@figbox=\box37 \tikz@figbox@bg=\box38 \tikz@tempbox=\box39 \tikz@tempbox@bg=\box40 \tikztreelevel=\count148 \tikznumberofchildren=\count149 \tikznumberofcurrentchild=\count150 \tikz@fig@count=\count151 (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex File: pgfmodulematrix.code.tex 2013/09/17 (rcs-revision 1.8) \pgfmatrixcurrentrow=\count152 \pgfmatrixcurrentcolumn=\count153 \pgf@matrix@numberofcolumns=\count154 ) \tikz@expandcount=\count155 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik zlibrarytopaths.code.tex File: tikzlibrarytopaths.code.tex 2008/06/17 v3.0.1a (rcs-revision 1.2) ))) (/usr/share/texlive/texmf-dist/tex/latex/titlesec/titlesec.sty Package: titlesec 2016/03/21 v2.10.2 Sectioning titles \ttl@box=\box41 \beforetitleunit=\skip69 \aftertitleunit=\skip70 \ttl@plus=\dimen188 \ttl@minus=\dimen189 \ttl@toksa=\toks34 \titlewidth=\dimen190 \titlewidthlast=\dimen191 \titlewidthfirst=\dimen192 ) (/usr/share/texlive/texmf-dist/tex/latex/titlesec/ttlkeys.def File: ttlkeys.def 2016/03/15 \c@ttlp@side=\count156 \ttlp@side=\count157 ) \c@myparts=\count158 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty Package: amsmath 2017/09/02 v2.17a AMS math features \@mathmargin=\skip71 For additional information on amsmath, use the `?' option. (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty Package: amstext 2000/06/29 v2.01 AMS text (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty File: amsgen.sty 1999/11/30 v2.0 generic functions \@emptytoks=\toks35 \ex@=\dimen193 )) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty Package: amsbsy 1999/11/29 v1.2d Bold Symbols \pmbraise@=\dimen194 ) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty Package: amsopn 2016/03/08 v2.02 operator names ) \inf@bad=\count159 LaTeX Info: Redefining \frac on input line 213. \uproot@=\count160 \leftroot@=\count161 LaTeX Info: Redefining \overline on input line 375. \classnum@=\count162 \DOTSCASE@=\count163 LaTeX Info: Redefining \ldots on input line 472. LaTeX Info: Redefining \dots on input line 475. LaTeX Info: Redefining \cdots on input line 596. \Mathstrutbox@=\box42 \strutbox@=\box43 \big@size=\dimen195 LaTeX Font Info: Redeclaring font encoding OML on input line 712. LaTeX Font Info: Redeclaring font encoding OMS on input line 713. \macc@depth=\count164 \c@MaxMatrixCols=\count165 \dotsspace@=\muskip11 \c@parentequation=\count166 \dspbrk@lvl=\count167 \tag@help=\toks36 \row@=\count168 \column@=\count169 \maxfields@=\count170 \andhelp@=\toks37 \eqnshift@=\dimen196 \alignsep@=\dimen197 \tagshift@=\dimen198 \tagwidth@=\dimen199 \totwidth@=\dimen256 \lineht@=\dimen257 \@envbody=\toks38 \multlinegap=\skip72 \multlinetaggap=\skip73 \mathdisplay@stack=\toks39 LaTeX Info: Redefining \[ on input line 2817. LaTeX Info: Redefining \] on input line 2818. ) (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amsfonts.sty Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support \symAMSa=\mathgroup6 \symAMSb=\mathgroup7 LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' (Font) U/euf/m/n --> U/euf/b/n on input line 106. LaTeX Font Info: Redeclaring math symbol \square on input line 141. ) (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amssymb.sty Package: amssymb 2013/01/14 v3.01 AMS font symbols LaTeX Font Info: Redeclaring math symbol \blacksquare on input line 48. LaTeX Font Info: Redeclaring math symbol \vDash on input line 60. LaTeX Font Info: Redeclaring math symbol \leftleftarrows on input line 63. LaTeX Font Info: Redeclaring math symbol \rightrightarrows on input line 64. LaTeX Font Info: Redeclaring math symbol \leqslant on input line 101. LaTeX Font Info: Redeclaring math symbol \geqslant on input line 108. LaTeX Font Info: Redeclaring math symbol \blacktriangleright on input line 1 20. LaTeX Font Info: Redeclaring math symbol \blacktriangleleft on input line 12 1. LaTeX Font Info: Redeclaring math symbol \complement on input line 165. LaTeX Font Info: Redeclaring math symbol \intercal on input line 166. LaTeX Font Info: Redeclaring math symbol \nleqslant on input line 181. LaTeX Font Info: Redeclaring math symbol \ngeqslant on input line 182. LaTeX Font Info: Redeclaring math symbol \varsubsetneq on input line 203. LaTeX Font Info: Redeclaring math symbol \subsetneqq on input line 207. LaTeX Font Info: Redeclaring math symbol \nparallel on input line 215. LaTeX Font Info: Redeclaring math symbol \nvDash on input line 221. LaTeX Font Info: Redeclaring math symbol \nexists on input line 235. LaTeX Font Info: Redeclaring math symbol \smallsetminus on input line 251. LaTeX Font Info: Redeclaring math symbol \curvearrowleft on input line 257. LaTeX Font Info: Redeclaring math symbol \curvearrowright on input line 258. LaTeX Font Info: Redeclaring math symbol \varkappa on input line 260. LaTeX Font Info: Redeclaring math symbol \hslash on input line 262. ) (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mathtools.sty Package: mathtools 2018/01/08 v1.21 mathematical typesetting tools (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mhsetup.sty Package: mhsetup 2017/03/31 v1.3 programming setup (MH) ) LaTeX Info: Thecontrolsequence`\('isalreadyrobust on input line 129. LaTeX Info: Thecontrolsequence`\)'isalreadyrobust on input line 129. LaTeX Info: Thecontrolsequence`\['isalreadyrobust on input line 129. LaTeX Info: Thecontrolsequence`\]'isalreadyrobust on input line 129. \g_MT_multlinerow_int=\count171 \l_MT_multwidth_dim=\dimen258 \origjot=\skip74 \l_MT_shortvdotswithinadjustabove_dim=\dimen259 \l_MT_shortvdotswithinadjustbelow_dim=\dimen260 \l_MT_above_intertext_sep=\dimen261 \l_MT_below_intertext_sep=\dimen262 \l_MT_above_shortintertext_sep=\dimen263 \l_MT_below_shortintertext_sep=\dimen264 )) (./head/settings_custom.tex (/usr/share/texlive/texmf-dist/tex/latex/algorithm2e/algorithm2e.sty Package: algorithm2e 2017/07/18 v5.2 algorithms environments \c@AlgoLine=\count172 \algocf@hangindent=\skip75 (/usr/share/texlive/texmf-dist/tex/latex/ifoddpage/ifoddpage.sty Package: ifoddpage 2016/04/23 v1.1 Conditionals for odd/even page detection \c@checkoddpage=\count173 ) (/usr/share/texlive/texmf-dist/tex/latex/tools/xspace.sty Package: xspace 2014/10/28 v1.13 Space after command names (DPC,MH) ) (/usr/share/texlive/texmf-dist/tex/latex/relsize/relsize.sty Package: relsize 2013/03/29 ver 4.1 ) \skiptotal=\skip76 \skiplinenumber=\skip77 \skiprule=\skip78 \skiphlne=\skip79 \skiptext=\skip80 \skiplength=\skip81 \algomargin=\skip82 \skipalgocfslide=\skip83 \algowidth=\dimen265 \inoutsize=\dimen266 \inoutindent=\dimen267 \interspacetitleruled=\dimen268 \interspacealgoruled=\dimen269 \interspacetitleboxruled=\dimen270 \algocf@ruledwidth=\skip84 \algocf@inoutbox=\box44 \algocf@inputbox=\box45 \AlCapSkip=\skip85 \AlCapHSkip=\skip86 \algoskipindent=\skip87 \algocf@nlbox=\box46 \algocf@hangingbox=\box47 \algocf@untilbox=\box48 \algocf@skipuntil=\skip88 \algocf@capbox=\box49 \algocf@lcaptionbox=\skip89 \algoheightruledefault=\skip90 \algoheightrule=\skip91 \algotitleheightruledefault=\skip92 \algotitleheightrule=\skip93 \c@algocfline=\count174 \c@algocfproc=\count175 \c@algocf=\count176 \algocf@algoframe=\box50 \algocf@algobox=\box51 ) (/usr/share/texlive/texmf-dist/tex/latex/float/float.sty Package: float 2001/11/08 v1.3d Float enhancements (AL) \c@float@type=\count177 \float@exts=\toks40 \float@box=\box52 \@float@everytoks=\toks41 \@floatcapt=\box53 ) (/usr/share/texlive/texmf-dist/tex/latex/hyperref/xr-hyper.sty Package: xr-hyper 2000/03/22 v6.00beta4 eXternal References (DPC) Package xr-hyper Warning: Load package `hyperref' after `xr-hyper'. ) (/usr/share/texlive/texmf-dist/tex/latex/makecell/makecell.sty Package: makecell 2009/08/03 V0.1e Managing of Tab Column Heads and Cells (/usr/share/texlive/texmf-dist/tex/latex/tools/array.sty Package: array 2016/10/06 v2.4d Tabular extension package (FMi) \col@sep=\dimen271 \extrarowheight=\dimen272 \NC@list=\toks42 \extratabsurround=\skip94 \backup@length=\skip95 ) \rotheadsize=\dimen273 \c@nlinenum=\count178 \TeXr@lab=\toks43 )) (./my_thesis.aux (./head/dedication.aux) (./head/acknowledgements.aux) (./head/preface.aux) (./head/abstracts.aux) (./main/ch_introduction.aux) (./main/ch_group_projects.aux) (./main/ch_encode_peaks.aux) (./main/ch_smile-seq.aux) (./main/ch_atac-seq.aux LaTeX Warning: Label `encode_peaks_algo_ndr_extend' multiply defined. ) (./tail/appendix.aux LaTeX Warning: Label `suppl_emseq_sp1_10class' multiply defined. ) (./tail/biblio.aux) (./tail/cv.aux)) \openout1 = `my_thesis.aux'. LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for FML/futm/m/it on input line 18. LaTeX Font Info: Try loading font information for FML+futm on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutm.fd File: fmlfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futm. ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for FMS/futm/m/n on input line 18. LaTeX Font Info: Try loading font information for FMS+futm on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmsfutm.fd File: fmsfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FMS/futm. ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for FMX/futm/m/n on input line 18. LaTeX Font Info: Try loading font information for FMX+futm on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmxfutm.fd File: fmxfutm.fd futm-extension ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 18. LaTeX Font Info: Try loading font information for TS1+cmr on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd File: ts1cmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Try loading font information for T1+futs on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/t1futs.fd File: t1futs.fd 2004/03/02 Fontinst v1.926 font definitions for T1/futs. ) LaTeX Info: Redefining \degres on input line 18. LaTeX Info: Redefining \dots on input line 18. LaTeX Info: Redefining \up on input line 18. (/usr/share/texlive/texmf-dist/tex/context/base/mkii/supp-pdf.mkii [Loading MPS to PDF converter (version 2006.09.02).] \scratchcounter=\count179 \scratchdimen=\dimen274 \scratchbox=\box54 \nofMPsegments=\count180 \nofMParguments=\count181 \everyMPshowfont=\toks44 \MPscratchCnt=\count182 \MPscratchDim=\dimen275 \MPnumerator=\count183 \makeMPintoPDFobject=\count184 \everyMPtoPDFconversion=\toks45 ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty Package: epstopdf-base 2016/05/15 v2.6 Base part for package epstopdf (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/grfext.sty Package: grfext 2016/05/16 v1.2 Manage graphics extensions (HO) ) Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4 38. Package grfext Info: Graphics extension search list: (grfext) [.pdf,.png,.jpg,.mps,.jpeg,.jbig2,.jb2,.PDF,.PNG,.JPG,.JPE G,.JBIG2,.JB2,.eps] (grfext) \AppendGraphicsExtensions on input line 456. (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv e )) Package caption Info: Begin \AtBeginDocument code. Package caption Info: subfig package v1.3 is loaded. Package caption Info: float package is loaded. Package caption Info: hyperref package is loaded. Package caption Info: listings package is loaded. Package caption Info: End \AtBeginDocument code. LaTeX Info: Redefining \microtypecontext on input line 18. Package microtype Info: Generating PDF output. Package microtype Info: Character protrusion enabled (level 2). Package microtype Info: Using default protrusion set `alltext'. Package microtype Info: Automatic font expansion enabled (level 2), (microtype) stretch: 20, shrink: 20, step: 1, non-selected. Package microtype Info: Using default expansion set `basictext'. Package microtype Info: No adjustment of tracking. Package microtype Info: No adjustment of interword spacing. Package microtype Info: No adjustment of character kerning. Package microtype Info: Loading generic protrusion settings for font family (microtype) `futs' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. \c@lstlisting=\count185 \AtBeginShipoutBox=\box55 Package hyperref Info: Link coloring ON on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/hyperref/nameref.sty Package: nameref 2016/05/21 v2.44 Cross-referencing by name of section (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/gettitlestring.sty Package: gettitlestring 2016/05/16 v1.5 Cleanup title references (HO) ) \c@section@level=\count186 ) LaTeX Info: Redefining \ref on input line 18. LaTeX Info: Redefining \pageref on input line 18. LaTeX Info: Redefining \nameref on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/pdflscape.sty Package: pdflscape 2016/05/14 v0.11 Display of landscape pages in PDF (HO) (/usr/share/texlive/texmf-dist/tex/latex/graphics/lscape.sty Package: lscape 2000/10/22 v3.01 Landscape Pages (DPC) ) Package pdflscape Info: Auto-detected driver: pdftex on input line 81. ) ABD: EveryShipout initializing macros (./head/titlepage.tex LaTeX Font Info: Try loading font information for T1+lmss on input line 5. (/usr/share/texmf/tex/latex/lm/t1lmss.fd File: t1lmss.fd 2009/10/30 v1.6 Font defs for Latin Modern ) Package microtype Info: Loading generic protrusion settings for font family (microtype) `lmss' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. LaTeX Font Info: Try loading font information for FML+futmi on input line 14 . (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutmi.fd File: fmlfutmi.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futmi. ) LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 10.07397pt on input line 14. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 7.63599pt on input line 14. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 5.51999pt on input line 14. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 10.07397pt on input line 14. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 7.63599pt on input line 14. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 5.51999pt on input line 14. LaTeX Font Info: Try loading font information for U+msa on input line 14. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsa.fd File: umsa.fd 2013/01/14 v3.01 AMS symbols A ) (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msa.cfg File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS) ) LaTeX Font Info: Try loading font information for U+msb on input line 14. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsb.fd File: umsb.fd 2013/01/14 v3.01 AMS symbols B ) (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msb.cfg File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS) ) File: images/epfl.pdf Graphic file (type pdf) Package pdftex.def Info: images/epfl.pdf used on input line 15. (pdftex.def) Requested size: 113.81102pt x 49.4394pt. Overfull \hbox (23.99998pt too wide) in paragraph at lines 14--41 [][] [] [1 {/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} <./images/epfl.pdf>]) \openout2 = `head/dedication.aux'. (./head/dedication.tex [2 ]) [3] \openout2 = `head/acknowledgements.aux'. (./head/acknowledgements.tex [0 ] LaTeX Font Info: Font shape `T1/futs/bx/n' in size <10.95> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 1. LaTeX Font Info: Font shape `T1/futs/bx/n' in size <24.88> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 1. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 22.88956pt on input line 1. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 15.89755pt on input line 1. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 11.03998pt on input line 1. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 22.88956pt on input line 1. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 15.89755pt on input line 1. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 11.03998pt on input line 1. )pdfTeX warning (ext4): destination with the same identifier (name{page.i}) has been already used, duplicate ignored \relax l.25 \include{head/acknowledgements} [1] \openout2 = `head/preface.aux'. (./head/preface.texpdfTeX warning (ext4): destination with the same identifier (name{page.ii}) has been already used, duplicate ignored \relax l.1 \cleardoublepage [2 ])pdfTeX warning (ext4): destination with the same identifier (name{page.iii}) has been already used, duplicate ignored \relax l.26 \include{head/preface} [3 ] \openout2 = `head/abstracts.aux'. (./head/abstracts.tex [4 ] Package babel Info: Redefining german shorthand "f (babel) in language on input line 18. Package babel Info: Redefining german shorthand "| (babel) in language on input line 18. Package babel Info: Redefining german shorthand "~ (babel) in language on input line 18. Package babel Info: Redefining german shorthand "f (babel) in language on input line 18. Package babel Info: Redefining german shorthand "| (babel) in language on input line 18. Package babel Info: Redefining german shorthand "~ (babel) in language on input line 18. [5 ] Package babel Info: Redefining german shorthand "f (babel) in language on input line 18. Package babel Info: Redefining german shorthand "| (babel) in language on input line 18. Package babel Info: Redefining german shorthand "~ (babel) in language on input line 18. Package babel Info: Redefining german shorthand "f (babel) in language on input line 18. Package babel Info: Redefining german shorthand "| (babel) in language on input line 18. Package babel Info: Redefining german shorthand "~ (babel) in language on input line 18. [6 ] [7 ] [8 ]) [9 ] [10 ] (./my_thesis.toc [11 ] [12]) \tf@toc=\write4 \openout4 = `my_thesis.toc'. [13] [14 ] \openout2 = `main/ch_introduction.aux'. (./main/ch_introduction.tex) [1 ] [2 ] \openout2 = `main/ch_group_projects.aux'. (./main/ch_group_projects.tex Chapter 1. LaTeX Font Info: Font shape `T1/futs/bx/n' in size <14.4> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 12. LaTeX Font Info: Font shape `T1/futs/bx/n' in size <12> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 17. - + File: images/ch_group_projects/mga_figure1.jpeg Graphic file (type jpg) Package pdftex.def Info: images/ch_group_projects/mga_figure1.jpeg used on inp ut line 27. (pdftex.def) Requested size: 400.23181pt x 134.13329pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [3 ] [4 <./images/ch_group_projects/mga_figure1.jpeg>] [5] - + File: images/ch_group_projects/epd_figure1.jpeg Graphic file (type jpg) Package pdftex.def Info: images/ch_group_projects/epd_figure1.jpeg used on inp ut line 72. (pdftex.def) Requested size: 215.12772pt x 174.80144pt. LaTeX Warning: Reference `L' on page 6 undefined on input line 73. LaTeX Warning: Reference `L' on page 6 undefined on input line 73. Underfull \vbox (badness 10000) has occurred while \output is active [] [6 <./images/ch_group_projects/epd_figure1.jpeg>] - + File: images/ch_group_projects/epd_motifs.png Graphic file (type png) Package pdftex.def Info: images/ch_group_projects/epd_motifs.png used on input line 124. (pdftex.def) Requested size: 346.89868pt x 173.44933pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [7] [8] Underfull \vbox (badness 10000) has occurred while \output is active [] [9 <./images/ch_group_projects/epd_motifs.png (PNG copy)>] [10] - File: images/ch_group_projects/pwmscan_flowchart.png Graphic file (type png) Package pdftex.def Info: images/ch_group_projects/pwmscan_flowchart.png used o n input line 178. (pdftex.def) Requested size: 279.21945pt x 370.52591pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [11 ] [12 <./images/ch_group_projects/pwmscan_flowchart.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [13] - File: images/ch_group_projects/pwmscan_figure_s1.png Graphic file (type png) Package pdftex.def Info: images/ch_group_projects/pwmscan_figure_s1.png used o n input line 222. (pdftex.def) Requested size: 269.60248pt x 153.77177pt. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 8.27998pt on input line 235. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 6.99199pt on input line 235. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 8.27998pt on input line 235. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 6.99199pt on input line 235. Underfull \vbox (badness 10000) has occurred while \output is active [] [14 <./images/ch_group_projects/pwmscan_figure_s1.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [15] [16] - + File: images/ch_group_projects/spark_figure1.pdf Graphic file (type pdf) Package pdftex.def Info: images/ch_group_projects/spark_figure1.pdf used on in put line 311. (pdftex.def) Requested size: 404.70483pt x 231.2599pt. LaTeX Warning: Reference `fig_s07' on page 17 undefined on input line 313. LaTeX Warning: Reference `fig_s07' on page 17 undefined on input line 313. Underfull \vbox (badness 10000) has occurred while \output is active [] [17 ] [18 <./images/ch_group_projects/spark_figure1.pdf>] - File: images/ch_group_projects/spark_supplemental_figure2.pdf Graphic file (typ e pdf) Package pdftex.def Info: images/ch_group_projects/spark_supplemental_figure2.pd f used on input line 326. (pdftex.def) Requested size: 462.5198pt x 202.3524pt. - File: images/ch_group_projects/spark_supplemental_figure4.pdf Graphic file (typ e pdf) Package pdftex.def Info: images/ch_group_projects/spark_supplemental_figure4.pd f used on input line 334. (pdftex.def) Requested size: 289.07487pt x 173.44492pt. - File: images/ch_group_projects/spark_supplemental_figure5.pdf Graphic file (typ e pdf) Package pdftex.def Info: images/ch_group_projects/spark_supplemental_figure5.pd f used on input line 342. (pdftex.def) Requested size: 289.07487pt x 173.44492pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [19 <./images/ch_group_projects/spark_supplemental_figure2.pdf>] [20 <./images /ch_group_projects/spark_supplemental_figure4.pdf> <./images/ch_group_projects/ spark_supplemental_figure5.pdf>]) [21] \openout2 = `main/ch_encode_peaks.aux'. (./main/ch_encode_peaks.tex [22 ] Chapter 2. - File: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png used on input line 26. (pdftex.def) Requested size: 520.34802pt x 173.44933pt. Overfull \hbox (102.66156pt too wide) in paragraph at lines 26--27 [] [] - File: images/ch_encode_peaks/peaklist_proportions_GM12878.png Graphic file (typ e png) Package pdftex.def Info: images/ch_encode_peaks/peaklist_proportions_GM12878.pn g used on input line 35. (pdftex.def) Requested size: 520.34802pt x 173.44933pt. Overfull \hbox (102.66156pt too wide) in paragraph at lines 35--36 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [23 ] [24 <./images/ch_encode_peaks/peaklist_peaknumber_GM12878.png (PNG copy)> <./im ages/ch_encode_peaks/peaklist_proportions_GM12878.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] [25] Underfull \vbox (badness 10000) has occurred while \output is active [] [26] - + File: images/ch_encode_peaks/MNase_profiles.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/MNase_profiles.png used on inp ut line 81. (pdftex.def) Requested size: 377.15814pt x 259.5691pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [27] [28 <./images/ch_encode_peaks/MNase_profiles.png>] - File: images/ch_encode_peaks/colocalization_ctcf.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/colocalization_ctcf.png used o n input line 108. (pdftex.def) Requested size: 403.20538pt x 320.54678pt. - File: images/ch_encode_peaks/CTCF_ndr_length_rad212.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/CTCF_ndr_length_rad212.png use d on input line 116. (pdftex.def) Requested size: 404.70483pt x 231.2599pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [29] [30 <./images/ch_encode_peaks/colocalization_ctcf.png>] [31 <./images/ch_encode _peaks/CTCF_ndr_length_rad212.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] [32] - + File: images/ch_encode_peaks/TF_associations.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/TF_associations.png used on in put line 145. (pdftex.def) Requested size: 240.13863pt x 152.59023pt. - File: images/ch_encode_peaks/ctcf_motif_association.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ctcf_motif_association.png use d on input line 153. (pdftex.def) Requested size: 433.61232pt x 339.18118pt. Overfull \hbox (15.92586pt too wide) in paragraph at lines 153--154 [] [] LaTeX Warning: Float too large for page by 31.83305pt on input line 209. [33 <./images/ch_encode_peaks/TF_associations.png>] [34 <./images/ch_encode_pea ks/ctcf_motif_association.png>] [35] Underfull \vbox (badness 10000) has occurred while \output is active [] [36] Underfull \vbox (badness 10000) has occurred while \output is active [] [37] - + File: images/ch_encode_peaks/ebf1_haib_1.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_1.png used on input line 234. (pdftex.def) Requested size: 260.16739pt x 346.88986pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [38] [39 <./images/ch_encode_peaks/ebf1_haib_1.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] [40] LaTeX Warning: Reference `https://ccg.epfl.ch/mga/hg19/phastcons/phastcons.html ' on page 41 undefined on input line 280. Underfull \vbox (badness 10000) has occurred while \output is active [] [41] Underfull \vbox (badness 10000) has occurred while \output is active [] [42] Underfull \vbox (badness 10000) has occurred while \output is active [] [43] LaTeX Font Info: Font shape `T1/futs/bx/n' in size <8> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 411. LaTeX Font Info: Try loading font information for T1+lmtt on input line 411. (/usr/share/texmf/tex/latex/lm/t1lmtt.fd File: t1lmtt.fd 2009/10/30 v1.6 Font defs for Latin Modern ) Package microtype Info: Loading generic protrusion settings for font family (microtype) `lmtt' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. Underfull \vbox (badness 10000) has occurred while \output is active [] [44] Underfull \vbox (badness 10000) has occurred while \output is active [] [45] Overfull \hbox (9.9085pt too wide) in paragraph at lines 434--435 \T1/futs/m/n/10.95 (-20) ences were the cor-rected EBF1 peaks (wgEn-codeAwgTf-b -sHaibGm12878Ebf1sc137065Pcr1xUniPk [] Underfull \vbox (badness 10000) has occurred while \output is active [] [46]) Underfull \vbox (badness 10000) has occurred while \output is active [] [47] [48] \openout2 = `main/ch_smile-seq.aux'. (./main/ch_smile-seq.tex Chapter 3. - + File: images/ch_smile-seq/figure1.jpg Graphic file (type jpg) Package pdftex.def Info: images/ch_smile-seq/figure1.jpg used on input line 23 . (pdftex.def) Requested size: 232.36755pt x 301.62613pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [49 ] [50 <./images/ch_smile-seq/figure1.jpg>] - + File: images/ch_smile-seq/figure_hmm.png Graphic file (type png) Package pdftex.def Info: images/ch_smile-seq/figure_hmm.png used on input line 41. (pdftex.def) Requested size: 416.22516pt x 215.09944pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [51 <./images/ch_smile-seq/figure_hmm.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [52] Underfull \vbox (badness 10000) has occurred while \output is active [] [53] - + File: images/ch_smile-seq/figure2b_3a.png Graphic file (type png) Package pdftex.def Info: images/ch_smile-seq/figure2b_3a.png used on input lin e 119. (pdftex.def) Requested size: 398.92334pt x 166.8203pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [54] [55 <./images/ch_smile-seq/figure2b_3a.png>]) [56] \openout2 = `main/ch_atac-seq.aux'. (./main/ch_atac-seq.tex Chapter 4. Package hyperref Info: bookmark level for unknown toc defaults to 0 on input li ne 5. - + File: images/ch_atac-seq/ATAC-seq2.png Graphic file (type png) -Package pdftex.def Info: images/ch_atac-seq/ATAC-seq2.png used on input line 1 -8. +Package pdftex.def Info: images/ch_atac-seq/ATAC-seq2.png used on input line 2 +0. (pdftex.def) Requested size: 234.49948pt x 321.44873pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [57 ] [58 <./images/ch_atac-seq/ATAC-seq2.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [59] - -File: images/ch_atac-seq/pipeline.png Graphic file (type png) - -Package pdftex.def Info: images/ch_atac-seq/pipeline.png used on input line 57 -. -(pdftex.def) Requested size: 276.28151pt x 214.92744pt. - Underfull \vbox (badness 10000) has occurred while \output is active [] [60] -[61 <./images/ch_atac-seq/pipeline.png>] -Underfull \hbox (badness 10000) in paragraph at lines 75--76 +Underfull \hbox (badness 10000) in paragraph at lines 81--82 \T1/futs/m/n/10.95 (-12) com / 10x . ^^\les / samples / cell-[]atac / 1 . 1 . 0 / atac _ v1 _ pbmc _ 5k / atac _ v1 _ pbmc _ 5k _ possorted _ bam . [] - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [62] - + File: images/ch_atac-seq/em.png Graphic file (type png) -Package pdftex.def Info: images/ch_atac-seq/em.png used on input line 87. +Package pdftex.def Info: images/ch_atac-seq/em.png used on input line 105. (pdftex.def) Requested size: 295.41382pt x 193.37625pt. -[63 <./images/ch_atac-seq/em.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [64] + [61] +[62 <./images/ch_atac-seq/em.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [65] + [63] Underfull \vbox (badness 10000) has occurred while \output is active [] - [66] + [64] Underfull \vbox (badness 10000) has occurred while \output is active [] - [67] - + [65] + File: images/ch_atac-seq/fragment_lengths.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/fragment_lengths.png used on input - line 250. + line 257. (pdftex.def) Requested size: 433.62335pt x 130.087pt. -Overfull \hbox (15.93689pt too wide) in paragraph at lines 250--251 +Overfull \hbox (15.93689pt too wide) in paragraph at lines 257--258 [] [] - File: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png - used on input line 259. + used on input line 266. (pdftex.def) Requested size: 346.88986pt x 260.16739pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [68 <./images/ch_atac-seq/fragment_lengths.png (PNG copy)>] [69 <./images/ch_a -tac-seq/ctcf_motifs_10e-6_aggregations.png (PNG copy)>] -] [68 <./images/ch_at +ac-seq/ctcf_motifs_10e-6_aggregations.png (PNG copy)>] + File: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png Graphic file (type png ) Package pdftex.def Info: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png us -ed on input line 279. +ed on input line 286. (pdftex.def) Requested size: 390.26102pt x 195.1305pt. - [70 <./images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png (PNG copy)>] + [69 <./images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [71] - File: images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png Graphic fi le (type png) Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_auc -_roc.png used on input line 318. +_roc.png used on input line 325. (pdftex.def) Requested size: 346.88986pt x 173.44492pt. - + File: images/ch_atac-seq/sp1_motifs_7class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_7class.png used on inpu -t line 326. +t line 333. (pdftex.def) Requested size: 455.30783pt x 303.53854pt. -Overfull \hbox (37.62137pt too wide) in paragraph at lines 326--327 +Overfull \hbox (37.62137pt too wide) in paragraph at lines 333--334 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [72] -Underfull \vbox (badness 5403) has occurred while \output is active [] - - [73 <./images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png (PNG cop -y)>] -[74 <./images/ch_atac-seq/sp1_motifs_7class.png (PNG copy)>] -] [73 <./images/ch_atac-seq/sp1_motifs_7class.png (PNG copy)>] + File: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png - used on input line 349. + used on input line 356. (pdftex.def) Requested size: 390.26102pt x 173.44933pt. - File: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png Graphic file (type p ng) Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png -used on input line 357. +used on input line 364. (pdftex.def) Requested size: 455.30783pt x 202.35902pt. -Overfull \hbox (37.62137pt too wide) in paragraph at lines 357--358 +Overfull \hbox (37.62137pt too wide) in paragraph at lines 364--365 [] [] - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [75] -[76 <./images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png (PNG copy)> <./im -ages/ch_atac-seq/ctcf_motifs_6class_shift_flip.png (PNG copy)>] -] +[75 <./images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png (PNG copy)>] + File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png Graphic fil e (type png) Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas -s_2.png used on input line 379. +s_2.png used on input line 386. (pdftex.def) Requested size: 390.26102pt x 260.17401pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [77] -[78 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png (PNG copy) + [76] +[77 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png (PNG copy) >] -LaTeX Warning: Reference `berest_quantification_2018' on page 79 undefined on i -nput line 395. +LaTeX Warning: Reference `berest_quantification_2018' on page 78 undefined on i +nput line 402. - + File: images/ch_atac-seq/data_classCTCF_8class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/data_classCTCF_8class.png used on -input line 404. +input line 411. (pdftex.def) Requested size: 433.62335pt x 216.81166pt. -Overfull \hbox (15.93689pt too wide) in paragraph at lines 404--405 +Overfull \hbox (15.93689pt too wide) in paragraph at lines 411--412 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [79] -[80 <./images/ch_atac-seq/data_classCTCF_8class.png (PNG copy)>] + [78] Underfull \vbox (badness 10000) has occurred while \output is active [] - [81] -LaTeX Font Info: Try loading font information for TS1+futs on input line 444 + [79 <./images/ch_atac-seq/data_classCTCF_8class.png (PNG copy)>] +Underfull \vbox (badness 10000) has occurred while \output is active [] + + [80] +LaTeX Font Info: Try loading font information for TS1+futs on input line 451 . (/usr/share/texlive/texmf-dist/tex/latex/fourier/ts1futs.fd File: ts1futs.fd 2004/03/26 Fontinst v1.926 font definitions for TS1/futs. ) Package microtype Info: Loading generic protrusion settings for font family (microtype) `futs' (encoding: TS1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. - [82]) + [81] +Underfull \hbox (badness 3343) in paragraph at lines 493--493 +\T1/futs/m/n/10.95 (+20) FOSL2, JUNB, JUN::JUNB, FOSL1::JUND, FOS::JUN, + [] + + +Overfull \hbox (6.68097pt too wide) in paragraph at lines 485--516 + [] + [] + + +Underfull \vbox (badness 10000) has occurred while \output is active [] + + [82] +[83]) Underfull \vbox (badness 10000) has occurred while \output is active [] - [83] + [84] Overfull \vbox (45.20699pt too high) has occurred while \output is active [] -[84] +[85] [86 + + +] \openout2 = `tail/appendix.aux'. (./tail/appendix.tex Appendix A. - File: images/ch_smile-seq/figure_s4_reproduced.png Graphic file (type png) Package pdftex.def Info: images/ch_smile-seq/figure_s4_reproduced.png used on input line 13. (pdftex.def) Requested size: 424.06316pt x 235.07848pt. Overfull \hbox (6.3767pt too wide) in paragraph at lines 13--14 [] [] +s_EM_4class_15shift_flip.png, id=1603, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_al lpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUni Pk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input line 31. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 5652) has occurred while \output is active [] - [85 - - + [87 <./images/ch_smile-seq/figure_s4_reproduced.png>] +_allpeaks_EM_4class_15shift_flip.png, id=1609, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_GM 12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1I ggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input lin e 39. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [86 <./images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_ + [88 <./images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_ allpeaks_EM_4class_15shift_flip.png (PNG copy)>] +aks_EM_4class_15shift_flip.png, id=1614, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM12878_ allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosU niPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input line 47. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [87 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_ + [89 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_ GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] +allpeaks_EM_4class_15shift_flip.png, id=1619, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_GM1 2878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIg gmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input line 55. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [88 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM1287 + [90 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM1287 8_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] +12878_allpeaks_EM_4class_15shift_flip.png, id=1624, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_MNa se_GM12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1 a300IggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on inpu t line 63. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [89 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_G + [91 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_G M12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] - + File: images/ch_encode_peaks/ctcf_ndr.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ctcf_ndr.png used on input lin e 71. (pdftex.def) Requested size: 346.89647pt x 462.52863pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [90 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_M + [92 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_M Nase_GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] - File: images/ch_encode_peaks/jund_motif_association.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/jund_motif_association.png use d on input line 79. (pdftex.def) Requested size: 433.61232pt x 339.18118pt. Overfull \hbox (15.92586pt too wide) in paragraph at lines 79--80 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [91 <./images/ch_encode_peaks/ctcf_ndr.png (PNG copy)>] - + [93 <./images/ch_encode_peaks/ctcf_ndr.png (PNG copy)>] + File: images/ch_encode_peaks/ebf1_haib_3.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_3.png used on input line 87. (pdftex.def) Requested size: 260.16739pt x 115.62994pt. - + File: images/ch_encode_peaks/MA0154_3.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/MA0154_3.png used on input lin e 95. (pdftex.def) Requested size: 361.3491pt x 180.67456pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [92 <./images/ch_encode_peaks/jund_motif_association.png>] - + [94 <./images/ch_encode_peaks/jund_motif_association.png>] + File: images/ch_encode_peaks/ebf1_haib_2.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_2.png used on input line 103. (pdftex.def) Requested size: 260.16739pt x 346.88986pt. - File: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png used on input line 113. (pdftex.def) Requested size: 390.26102pt x 173.44933pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [93 <./images/ch_encode_peaks/ebf1_haib_3.png (PNG copy)> <./images/ch_encode_ -peaks/MA0154_3.png>] [94 <./images/ch_encode_peaks/ebf1_haib_2.png (PNG copy)>] - <./images/ch_encode_ +peaks/MA0154_3.png>] [96 <./images/ch_encode_peaks/ebf1_haib_2.png (PNG copy)>] + File: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png Graphic file (type pn g) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png u sed on input line 121. (pdftex.def) Requested size: 390.26102pt x 173.44933pt. - File: images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png Graphi c file (type png) Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_bes t_motifs.png used on input line 129. (pdftex.def) Requested size: 202.3524pt x 231.2599pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [95 <./images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png (PNG copy)> <./im + [97 <./images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png (PNG copy)> <./im ages/ch_atac-seq/sp1_motifs_6class_shift_flip.png (PNG copy)>] - + File: images/ch_atac-seq/sp1_motifs_10class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_10class.png used on inp ut line 137. (pdftex.def) Requested size: 455.30783pt x 303.53854pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 137--138 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [96 <./images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png (PNG + [98 <./images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png (PNG copy)>] File: images/ch_atac-seq/sp1_motifs_10class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_10class.png used on inp ut line 145. (pdftex.def) Requested size: 455.30783pt x 303.53854pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 145--146 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [97 <./images/ch_atac-seq/sp1_motifs_10class.png (PNG copy)>] -] + File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas s.png used on input line 153. (pdftex.def) Requested size: 390.26102pt x 260.17401pt. - + File: images/ch_atac-seq/data_classPU1_2class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/data_classPU1_2class.png used on i nput line 161. (pdftex.def) Requested size: 433.62335pt x 216.81166pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 161--162 [] [] - + File: images/ch_atac-seq/data_classjun_3class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/data_classjun_3class.png used on i nput line 169. (pdftex.def) Requested size: 433.62335pt x 216.81166pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 169--170 [] [] -) [98] [99 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png (PNG -copy)>] [100 <./images/ch_atac-seq/data_classPU1_2class.png (PNG copy)>] -[101 <./images/ch_atac-seq/data_classjun_3class.png (PNG copy)>] [102 +) [100] [101 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png (PN +G copy)>] [102 <./images/ch_atac-seq/data_classPU1_2class.png (PNG copy)>] +[103 <./images/ch_atac-seq/data_classjun_3class.png (PNG copy)>] [104 ] \openout2 = `tail/biblio.aux'. -(./tail/biblio.tex (./my_thesis.bbl [103 +(./tail/biblio.tex (./my_thesis.bbl [105 -] [104] [105] [106] [107] [108] -[109] [110])) [111] +] [106] [107] [108] [109] [110] +[111] [112])) [113] \openout2 = `tail/cv.aux'. - (./tail/cv.tex [112 + (./tail/cv.tex [114 ] - + File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf used on input line 6. (pdftex.def) Requested size: 597.22978pt x 845.15544pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf used on input line 6. (pdftex.def) Requested size: 597.22978pt x 845.15544pt. - + File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.22978pt x 845.15544pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. - [113 <./tail/cv.pdf>] - + [115 <./tail/cv.pdf>] + File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page2 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page2 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page2 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. - [114 <./tail/cv.pdf>]) + [116 <./tail/cv.pdf>]) Package atveryend Info: Empty hook `BeforeClearDocument' on input line 82. Package atveryend Info: Empty hook `AfterLastShipout' on input line 82. (./my_thesis.aux (./head/dedication.aux) (./head/acknowledgements.aux) (./head/preface.aux) (./head/abstracts.aux) (./main/ch_introduction.aux) (./main/ch_group_projects.aux) (./main/ch_encode_peaks.aux) (./main/ch_smile-seq.aux) (./main/ch_atac-seq.aux) (./tail/appendix.aux) (./tail/biblio.aux) (./tail/cv.aux)) Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 82. Package atveryend Info: Empty hook `AtEndAfterFileList' on input line 82. LaTeX Warning: There were undefined references. LaTeX Warning: There were multiply-defined labels. Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 82. ) Here is how much of TeX's memory you used: - 27990 strings out of 492982 - 497594 string characters out of 6134895 - 762147 words of memory out of 5000000 - 30090 multiletter control sequences out of 15000+600000 + 27979 strings out of 492982 + 497327 string characters out of 6134895 + 761740 words of memory out of 5000000 + 30081 multiletter control sequences out of 15000+600000 146869 words of font info for 340 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 57i,24n,79p,2621b,1323s stack positions out of 5000i,500n,10000p,200000b,80000s {/usr/share/texmf/fonts/enc/dvips/lm/lm-ec.enc}{/usr/share/texlive/texmf-dist /fonts/enc/dvips/base/8r.enc} -Output written on my_thesis.pdf (132 pages, 84877008 bytes). +Output written on my_thesis.pdf (134 pages, 84847998 bytes). PDF statistics: - 2184 PDF objects out of 2487 (max. 8388607) - 1884 compressed objects within 19 object streams - 484 named destinations out of 1000 (max. 500000) - 33741 words of extra memory for PDF output out of 35830 (max. 10000000) + 2171 PDF objects out of 2487 (max. 8388607) + 1870 compressed objects within 19 object streams + 483 named destinations out of 1000 (max. 500000) + 33720 words of extra memory for PDF output out of 35830 (max. 10000000) diff --git a/my_thesis.pdf b/my_thesis.pdf index ca8fb80..9390d98 100644 Binary files a/my_thesis.pdf and b/my_thesis.pdf differ diff --git a/my_thesis.synctex.gz b/my_thesis.synctex.gz index c8adb70..eac9120 100644 Binary files a/my_thesis.synctex.gz and b/my_thesis.synctex.gz differ diff --git a/my_thesis.toc b/my_thesis.toc index b76bf6a..432470d 100644 --- a/my_thesis.toc +++ b/my_thesis.toc @@ -1,97 +1,95 @@ \babel@toc {english}{} \babel@toc {french}{} \babel@toc {english}{} \contentsline {chapter}{Acknowledgements}{i}{chapter*.1} \contentsline {chapter}{Preface}{iii}{chapter*.2} \contentsline {chapter}{Abstract (English/Fran\IeC {\c c}ais/Deutsch)}{v}{chapter*.3} \babel@toc {german}{} \babel@toc {english}{} \babel@toc {french}{} \babel@toc {english}{} \contentsline {chapter}{Introduction}{1}{chapter*.7} \contentsline {chapter}{\numberline {1}Published laboratory projects}{3}{chapter.1} \contentsline {chapter}{Published laboratory projects}{3}{chapter.1} \contentsline {section}{\numberline {1.1}Mass Genome Annotation repository}{3}{section.1.1} \contentsline {subsection}{\numberline {1.1.1}Introduction}{3}{subsection.1.1.1} \contentsline {subsection}{\numberline {1.1.2}MGA content and organization}{3}{subsection.1.1.2} \contentsline {subsection}{\numberline {1.1.3}Conclusions}{5}{subsection.1.1.3} \contentsline {section}{\numberline {1.2}Eukaryotic Promoter Database}{6}{section.1.2} \contentsline {subsection}{\numberline {1.2.1}Introduction}{7}{subsection.1.2.1} \contentsline {subsection}{\numberline {1.2.2}EPDnew now annotates (some of) your mushrooms and vegetables}{7}{subsection.1.2.2} \contentsline {subsection}{\numberline {1.2.3}Increased mapping precision in human}{7}{subsection.1.2.3} \contentsline {subsection}{\numberline {1.2.4}Integration of EPDnew with other resources}{9}{subsection.1.2.4} \contentsline {subsection}{\numberline {1.2.5}Conclusions}{10}{subsection.1.2.5} \contentsline {subsection}{\numberline {1.2.6}Methods}{10}{subsection.1.2.6} \contentsline {subsubsection}{Motif occurrence profiles}{10}{subsection.1.2.6} \contentsline {section}{\numberline {1.3}PWMScan}{11}{section.1.3} \contentsline {subsection}{\numberline {1.3.1}Introduction}{11}{subsection.1.3.1} \contentsline {subsection}{\numberline {1.3.2}Data and methods}{13}{subsection.1.3.2} \contentsline {subsection}{\numberline {1.3.3}Benchmark}{14}{subsection.1.3.3} \contentsline {subsection}{\numberline {1.3.4}Conclusions}{16}{subsection.1.3.4} \contentsline {section}{\numberline {1.4}SPar-K}{17}{section.1.4} \contentsline {subsection}{\numberline {1.4.1}Introduction}{17}{subsection.1.4.1} \contentsline {subsection}{\numberline {1.4.2}Methods}{17}{subsection.1.4.2} \contentsline {subsection}{\numberline {1.4.3}Results}{21}{subsection.1.4.3} \contentsline {subsection}{\numberline {1.4.4}Conclusion}{21}{subsection.1.4.4} \contentsline {chapter}{\numberline {2}ENCODE peaks analysis}{23}{chapter.2} \contentsline {chapter}{ENCODE peaks analysis}{23}{chapter.2} \contentsline {section}{\numberline {2.1}Data}{23}{section.2.1} \contentsline {section}{\numberline {2.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{25}{section.2.2} \contentsline {subsection}{\numberline {2.2.1}Data realignment}{26}{subsection.2.2.1} \contentsline {section}{\numberline {2.3}Nucleosome organization around transcription factor binding sites}{27}{section.2.3} \contentsline {section}{\numberline {2.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{29}{section.2.4} \contentsline {section}{\numberline {2.5}CTCF and JunD interactomes}{33}{section.2.5} \contentsline {section}{\numberline {2.6}EBF1 binds nucleosomes}{38}{section.2.6} \contentsline {section}{\numberline {2.7}Methods}{40}{section.2.7} \contentsline {subsection}{\numberline {2.7.1}Data and data processing}{40}{subsection.2.7.1} \contentsline {subsection}{\numberline {2.7.2}Classification of MNase patterns}{41}{subsection.2.7.2} \contentsline {subsection}{\numberline {2.7.3}Quantifying nucleosome array intensity from classification results}{42}{subsection.2.7.3} \contentsline {subsection}{\numberline {2.7.4}Peak colocalization}{43}{subsection.2.7.4} \contentsline {subsection}{\numberline {2.7.5}NDR detection}{43}{subsection.2.7.5} \contentsline {subsection}{\numberline {2.7.6}CTCF and JunD interactors}{46}{subsection.2.7.6} \contentsline {subsection}{\numberline {2.7.7}EBF1 and nucleosome}{47}{subsection.2.7.7} \contentsline {chapter}{\numberline {3}SMiLE-seq data analysis}{49}{chapter.3} \contentsline {chapter}{SMiLE-seq data analysis}{49}{chapter.3} \contentsline {subsection}{\numberline {3.0.1}Introduction}{49}{subsection.3.0.1} \contentsline {subsection}{\numberline {3.0.2}Hidden Markov Model Motif discovery}{51}{subsection.3.0.2} \contentsline {subsection}{\numberline {3.0.3}Binding motif evaluation}{52}{subsection.3.0.3} \contentsline {subsection}{\numberline {3.0.4}Results}{54}{subsection.3.0.4} \contentsline {subsection}{\numberline {3.0.5}Conclusions}{56}{subsection.3.0.5} \contentsline {chapter}{\numberline {4}Chromatin accessibility of monocytes}{57}{chapter.4} \contentsline {section}{\numberline {4.1}ATAC-seq}{57}{section.4.1} \contentsline {section}{\numberline {4.2}Monitoring TF binding}{59}{section.4.2} \contentsline {section}{\numberline {4.3}The advent of single cell DGF}{60}{section.4.3} -\contentsline {section}{\numberline {4.4}A quick overview of scATAC-seq data analysis}{60}{section.4.4} -\contentsline {section}{\numberline {4.5}Open questions}{60}{section.4.5} -\contentsline {section}{\numberline {4.6}Data}{62}{section.4.6} -\contentsline {section}{\numberline {4.7}Identification of catalog of chromatin architectures}{62}{section.4.7} -\contentsline {subsection}{\numberline {4.7.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{63}{subsection.4.7.1} -\contentsline {subsection}{\numberline {4.7.2}EMSequence : an algorithm to identify over-represented sequences}{64}{subsection.4.7.2} -\contentsline {subsubsection}{without shift and flip}{64}{subsection.4.7.2} -\contentsline {subsubsection}{with shift and flip}{65}{equation.4.7.2} -\contentsline {subsection}{\numberline {4.7.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{66}{subsection.4.7.3} -\contentsline {subsection}{\numberline {4.7.4}Data realignment}{67}{subsection.4.7.4} -\contentsline {section}{\numberline {4.8}Results}{68}{section.4.8} -\contentsline {subsection}{\numberline {4.8.1}Fragment size analysis}{68}{subsection.4.8.1} -\contentsline {subsection}{\numberline {4.8.2}Measuring open chromatin and nucleosome occupancy}{70}{subsection.4.8.2} -\contentsline {subsection}{\numberline {4.8.3}Evaluation of EMSequence and ChIPPartitioning}{72}{subsection.4.8.3} -\contentsline {subsubsection}{EMSequence}{72}{subsection.4.8.3} -\contentsline {subsubsection}{ChIPPartitioning}{75}{figure.caption.38} -\contentsline {section}{\numberline {4.9}Aligning the binding sites}{77}{section.4.9} -\contentsline {section}{\numberline {4.10}Exploring individual TF classes}{79}{section.4.10} -\contentsline {section}{\numberline {4.11}Discussions}{81}{section.4.11} -\contentsline {section}{\numberline {4.12}Perspectives}{81}{section.4.12} -\contentsline {section}{\numberline {4.13}Methods}{82}{section.4.13} -\contentsline {subsection}{\numberline {4.13.1}Implementations}{82}{subsection.4.13.1} -\contentsline {subsection}{\numberline {4.13.2}Fragment classes}{82}{subsection.4.13.2} -\contentsline {subsection}{\numberline {4.13.3}Simulated sequences}{83}{subsection.4.13.3} -\contentsline {subsection}{\numberline {4.13.4}Realignment using JASPAR motifs}{83}{subsection.4.13.4} -\contentsline {subsection}{\numberline {4.13.5}Display of motif logo}{83}{subsection.4.13.5} -\contentsline {subsection}{\numberline {4.13.6}Model extension}{83}{subsection.4.13.6} -\contentsline {subsection}{\numberline {4.13.7}Extracting data assigned to a class}{83}{subsection.4.13.7} -\contentsline {chapter}{\numberline {A}An appendix}{85}{appendix.A} -\contentsline {section}{\numberline {A.1}Supplementary figures}{85}{section.A.1} +\contentsline {section}{\numberline {4.4}Open issues}{60}{section.4.4} +\contentsline {section}{\numberline {4.5}Data}{60}{section.4.5} +\contentsline {section}{\numberline {4.6}Identifying over-represented signals}{61}{section.4.6} +\contentsline {subsection}{\numberline {4.6.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{61}{subsection.4.6.1} +\contentsline {subsection}{\numberline {4.6.2}EMSequence : an algorithm to identify over-represented sequences}{61}{subsection.4.6.2} +\contentsline {subsubsection}{without shift and flip}{63}{figure.caption.32} +\contentsline {subsubsection}{with shift and flip}{63}{equation.4.6.2} +\contentsline {subsection}{\numberline {4.6.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{65}{subsection.4.6.3} +\contentsline {subsection}{\numberline {4.6.4}Data realignment}{66}{subsection.4.6.4} +\contentsline {section}{\numberline {4.7}Results}{66}{section.4.7} +\contentsline {subsection}{\numberline {4.7.1}Fragment size analysis}{66}{subsection.4.7.1} +\contentsline {subsection}{\numberline {4.7.2}Measuring open chromatin and nucleosome occupancy}{69}{subsection.4.7.2} +\contentsline {subsection}{\numberline {4.7.3}Evaluation of EMSequence and ChIPPartitioning}{71}{subsection.4.7.3} +\contentsline {subsubsection}{EMSequence}{71}{subsection.4.7.3} +\contentsline {subsubsection}{ChIPPartitioning}{74}{figure.caption.37} +\contentsline {section}{\numberline {4.8}Aligning the binding sites}{76}{section.4.8} +\contentsline {section}{\numberline {4.9}Exploring individual TF classes}{79}{section.4.9} +\contentsline {section}{\numberline {4.10}Discussions}{80}{section.4.10} +\contentsline {section}{\numberline {4.11}Perspectives}{80}{section.4.11} +\contentsline {section}{\numberline {4.12}Methods}{81}{section.4.12} +\contentsline {subsection}{\numberline {4.12.1}Implementations}{81}{subsection.4.12.1} +\contentsline {subsection}{\numberline {4.12.2}Fragment classes}{81}{subsection.4.12.2} +\contentsline {subsection}{\numberline {4.12.3}Simulated sequences}{81}{subsection.4.12.3} +\contentsline {subsection}{\numberline {4.12.4}Realignment using JASPAR motifs}{82}{subsection.4.12.4} +\contentsline {subsection}{\numberline {4.12.5}Model extension}{82}{subsection.4.12.5} +\contentsline {subsection}{\numberline {4.12.6}Extracting data assigned to a class}{82}{subsection.4.12.6} \vspace {\normalbaselineskip } -\contentsline {chapter}{Bibliography}{103}{section*.62} -\contentsline {chapter}{Bibliography}{111}{appendix*.63} -\contentsline {chapter}{Curriculum Vitae}{113}{section*.64} +\contentsline {chapter}{\numberline {A}An appendix}{87}{appendix.A} +\contentsline {section}{\numberline {A.1}Supplementary figures}{87}{section.A.1} +\contentsline {chapter}{Bibliography}{105}{section*.62} +\contentsline {chapter}{Bibliography}{113}{appendix*.63} +\contentsline {chapter}{Curriculum Vitae}{115}{section*.64} diff --git a/tail/appendix.aux b/tail/appendix.aux index de872b2..f64b308 100644 --- a/tail/appendix.aux +++ b/tail/appendix.aux @@ -1,97 +1,97 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{jolma_dna-binding_2013} \citation{jolma_dna-binding_2013} -\@writefile{toc}{\contentsline {chapter}{\numberline {A}An appendix}{85}{appendix.A}} +\@writefile{toc}{\contentsline {chapter}{\numberline {A}An appendix}{87}{appendix.A}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {A.1}Supplementary figures}{85}{section.A.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.1}{\ignorespaces \textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }}{85}{figure.caption.43}} -\newlabel{suppl_smileseq_auc_2}{{A.1}{85}{\textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }{figure.caption.43}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.2}{\ignorespaces \textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{86}{figure.caption.44}} -\newlabel{suppl_encode_peaks_em_ctcf}{{A.2}{86}{\textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.44}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.3}{\ignorespaces \textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{87}{figure.caption.45}} -\newlabel{suppl_encode_peaks_em_nrf1}{{A.3}{87}{\textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.45}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.4}{\ignorespaces \textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{88}{figure.caption.46}} -\newlabel{suppl_encode_peaks_em_cfos}{{A.4}{88}{\textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.46}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.5}{\ignorespaces \textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{89}{figure.caption.47}} -\newlabel{suppl_encode_peaks_em_max}{{A.5}{89}{\textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.47}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.6}{\ignorespaces \textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{90}{figure.caption.48}} -\newlabel{suppl_encode_peaks_em_brca1}{{A.6}{90}{\textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.48}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.7}{\ignorespaces \textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }}{91}{figure.caption.49}} -\newlabel{suppl_encode_peaks_ctcf_ndr}{{A.7}{91}{\textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }{figure.caption.49}{}} +\@writefile{toc}{\contentsline {section}{\numberline {A.1}Supplementary figures}{87}{section.A.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.1}{\ignorespaces \textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }}{87}{figure.caption.43}} +\newlabel{suppl_smileseq_auc_2}{{A.1}{87}{\textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }{figure.caption.43}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.2}{\ignorespaces \textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{88}{figure.caption.44}} +\newlabel{suppl_encode_peaks_em_ctcf}{{A.2}{88}{\textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.44}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.3}{\ignorespaces \textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{89}{figure.caption.45}} +\newlabel{suppl_encode_peaks_em_nrf1}{{A.3}{89}{\textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.45}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.4}{\ignorespaces \textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{90}{figure.caption.46}} +\newlabel{suppl_encode_peaks_em_cfos}{{A.4}{90}{\textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.46}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.5}{\ignorespaces \textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{91}{figure.caption.47}} +\newlabel{suppl_encode_peaks_em_max}{{A.5}{91}{\textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.47}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.6}{\ignorespaces \textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{92}{figure.caption.48}} +\newlabel{suppl_encode_peaks_em_brca1}{{A.6}{92}{\textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.48}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.7}{\ignorespaces \textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }}{93}{figure.caption.49}} +\newlabel{suppl_encode_peaks_ctcf_ndr}{{A.7}{93}{\textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }{figure.caption.49}{}} \citation{khan_jaspar_2018} \citation{khan_jaspar_2018} -\@writefile{lof}{\contentsline {figure}{\numberline {A.8}{\ignorespaces \textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }}{92}{figure.caption.50}} -\newlabel{suppl_encode_peaks_jund_association}{{A.8}{92}{\textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }{figure.caption.50}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.9}{\ignorespaces \textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{93}{figure.caption.51}} -\newlabel{suppl_encode_peaks_ebf1_nucl}{{A.9}{93}{\textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.51}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.10}{\ignorespaces \textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }}{93}{figure.caption.52}} -\newlabel{suppl_encode_peaks_ebf1_logo}{{A.10}{93}{\textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }{figure.caption.52}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.11}{\ignorespaces \textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }}{94}{figure.caption.53}} -\newlabel{suppl_encode_peaks_ebf1_chrom}{{A.11}{94}{\textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }{figure.caption.53}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.8}{\ignorespaces \textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }}{94}{figure.caption.50}} +\newlabel{suppl_encode_peaks_jund_association}{{A.8}{94}{\textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }{figure.caption.50}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.9}{\ignorespaces \textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{95}{figure.caption.51}} +\newlabel{suppl_encode_peaks_ebf1_nucl}{{A.9}{95}{\textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.51}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.10}{\ignorespaces \textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }}{95}{figure.caption.52}} +\newlabel{suppl_encode_peaks_ebf1_logo}{{A.10}{95}{\textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }{figure.caption.52}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.11}{\ignorespaces \textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }}{96}{figure.caption.53}} +\newlabel{suppl_encode_peaks_ebf1_chrom}{{A.11}{96}{\textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }{figure.caption.53}{}} \citation{ou_motifstack_2018} \citation{ou_motifstack_2018} -\@writefile{lof}{\contentsline {figure}{\numberline {A.12}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{95}{figure.caption.54}} -\newlabel{suppl_emread_sp1_noshift_flip}{{A.12}{95}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.54}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.13}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{95}{figure.caption.55}} -\newlabel{suppl_emread_sp1_shift_flip}{{A.13}{95}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.55}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.14}{\ignorespaces \textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }}{96}{figure.caption.56}} -\newlabel{suppl_atac_seq_emseq_best_motifs}{{A.14}{96}{\textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }{figure.caption.56}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.15}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{97}{figure.caption.57}} -\newlabel{suppl_emseq_sp1_10class}{{A.15}{97}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.57}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.16}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{98}{figure.caption.58}} -\newlabel{suppl_emseq_sp1_10class}{{A.16}{98}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.58}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.17}{\ignorespaces \textbf {Extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{99}{figure.caption.59}} -\newlabel{suppl_atac_seq_23class}{{A.17}{99}{\textbf {Extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.59}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.18}{\ignorespaces \textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{100}{figure.caption.60}} -\newlabel{suppl_atac_seq_pu1_subclass}{{A.18}{100}{\textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.60}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.19}{\ignorespaces \textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{101}{figure.caption.61}} -\newlabel{suppl_atac_seq_ap1_subclass}{{A.19}{101}{\textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.61}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.12}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{97}{figure.caption.54}} +\newlabel{suppl_emread_sp1_noshift_flip}{{A.12}{97}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.54}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.13}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{97}{figure.caption.55}} +\newlabel{suppl_emread_sp1_shift_flip}{{A.13}{97}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.55}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.14}{\ignorespaces \textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }}{98}{figure.caption.56}} +\newlabel{suppl_atac_seq_emseq_best_motifs}{{A.14}{98}{\textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }{figure.caption.56}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.15}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{99}{figure.caption.57}} +\newlabel{suppl_emseq_sp1_10class}{{A.15}{99}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.57}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.16}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{100}{figure.caption.58}} +\newlabel{suppl_emseq_sp1_10class}{{A.16}{100}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.58}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.17}{\ignorespaces \textbf {Extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{101}{figure.caption.59}} +\newlabel{suppl_atac_seq_23class}{{A.17}{101}{\textbf {Extended sequence and chromatin models} found in 70'462 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.59}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.18}{\ignorespaces \textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{102}{figure.caption.60}} +\newlabel{suppl_atac_seq_pu1_subclass}{{A.18}{102}{\textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.60}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.19}{\ignorespaces \textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{103}{figure.caption.61}} +\newlabel{suppl_atac_seq_ap1_subclass}{{A.19}{103}{\textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.61}{}} \@setckpt{tail/appendix}{ -\setcounter{page}{102} +\setcounter{page}{104} \setcounter{equation}{0} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{1} \setcounter{section}{1} \setcounter{subsection}{0} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{19} \setcounter{table}{0} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{39} \setcounter{algocfline}{3} \setcounter{algocfproc}{3} \setcounter{algocf}{3} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/tail/biblio.aux b/tail/biblio.aux index 38a0ea3..51bc313 100644 --- a/tail/biblio.aux +++ b/tail/biblio.aux @@ -1,150 +1,149 @@ \relax \providecommand\hyper@newdestlabel[2]{} \bibstyle{apalike} \bibdata{tail/bibliography} \bibcite{adey_rapid_2010}{{1}{2010}{{Adey et~al.}}{{}}} \bibcite{aerts_toucan:_2003}{{2}{2003}{{Aerts et~al.}}{{}}} \bibcite{aibar_scenic:_2017}{{3}{2017}{{Aibar et~al.}}{{}}} \bibcite{alipanahi_predicting_2015}{{4}{2015}{{Alipanahi et~al.}}{{}}} \bibcite{ambrosini_chip-seq_2016}{{5}{2016a}{{Ambrosini et~al.}}{{}}} \bibcite{ambrosini_chip-seq_2016-1}{{6}{2016b}{{Ambrosini et~al.}}{{}}} \bibcite{ambrosini_pwmscan:_2018}{{7}{2018}{{Ambrosini et~al.}}{{}}} \bibcite{ambrosini_signal_2003}{{8}{2003}{{Ambrosini et~al.}}{{}}} \bibcite{angerer_single_2017}{{9}{2017}{{Angerer et~al.}}{{}}} \bibcite{bailey_znf143_2015}{{10}{2015}{{Bailey et~al.}}{{}}} \bibcite{bailey_meme_2009}{{11}{2009}{{Bailey et~al.}}{{}}} -\@writefile{toc}{\contentsline {chapter}{Bibliography}{103}{section*.62}} +\@writefile{toc}{\contentsline {chapter}{Bibliography}{105}{section*.62}} \bibcite{barrett_ncbi_2011}{{12}{2011}{{Barrett et~al.}}{{}}} \bibcite{barski_high-resolution_2007}{{13}{2007}{{Barski et~al.}}{{}}} \bibcite{beckstette_fast_2006}{{14}{2006}{{Beckstette et~al.}}{{}}} \bibcite{berest_quantification_2018}{{15}{2018}{{Berest et~al.}}{{}}} \bibcite{berger_universal_2009}{{16}{2009}{{Berger and Bulyk}}{{}}} \bibcite{boller_defining_2018}{{17}{2018}{{Boller et~al.}}{{}}} \bibcite{boller_pioneering_2016}{{18}{2016}{{Boller et~al.}}{{}}} \bibcite{boyle_high-resolution_2008}{{19}{2008}{{Boyle et~al.}}{{}}} \bibcite{bucher_compilation_1986}{{20}{1986}{{Bucher and Trifonov}}{{}}} \bibcite{buenrostro_transposition_2013}{{21}{2013}{{Buenrostro et~al.}}{{}}} \bibcite{castro-mondragon_rsat_2017}{{22}{2017}{{Castro-Mondragon et~al.}}{{}}} \bibcite{chatr-aryamontri_biogrid_2017}{{23}{2017}{{Chatr-aryamontri et~al.}}{{}}} \bibcite{cheng_understanding_2012}{{24}{2012}{{Cheng et~al.}}{{}}} \bibcite{cirillo_opening_2002}{{25}{2002}{{Cirillo et~al.}}{{}}} \bibcite{consortium_integrated_2012}{{26}{2012}{{Consortium}}{{}}} \bibcite{dalton_clustering_2009}{{27}{2009}{{Dalton et~al.}}{{}}} \bibcite{donohoe_identification_2007}{{28}{2007}{{Donohoe et~al.}}{{}}} \bibcite{dreos_epd_2013}{{29}{2013}{{Dreos et~al.}}{{}}} \bibcite{dreos_eukaryotic_2017}{{30}{2017}{{Dreos et~al.}}{{}}} \bibcite{dreos_mga_2018}{{31}{2018}{{Dreos et~al.}}{{}}} \bibcite{dreos_eukaryotic_2015}{{32}{2015}{{Dreos et~al.}}{{}}} \bibcite{fan_characterizing_2016}{{33}{2016}{{Fan et~al.}}{{}}} \bibcite{fu_motifviz:_2004}{{34}{2004}{{Fu et~al.}}{{}}} \bibcite{fu_insulator_2008}{{35}{2008}{{Fu et~al.}}{{}}} \bibcite{gaffney_controls_2012}{{36}{2012}{{Gaffney et~al.}}{{}}} \bibcite{gerstein_architecture_2012}{{37}{2012}{{Gerstein et~al.}}{{}}} \bibcite{ghirlando_ctcf:_2016}{{38}{2016}{{Ghirlando and Felsenfeld}}{{}}} \bibcite{gonzalez-blas_cistopic:_2019}{{39}{2019}{{Gonz\IeC {\'a}lez-Blas et~al.}}{{}}} \bibcite{grant_fimo:_2011}{{40}{2011}{{Grant et~al.}}{{}}} \bibcite{grossman_positional_2018}{{41}{2018}{{Grossman et~al.}}{{}}} \bibcite{groux_spar-k:_2019}{{42}{2019}{{Groux and Bucher}}{{}}} \bibcite{guo_high_2012}{{43}{2012}{{Guo et~al.}}{{}}} \bibcite{hagman_early_2005}{{44}{2005}{{Hagman and Lukin}}{{}}} \bibcite{heinz_simple_2010}{{45}{2010}{{Heinz et~al.}}{{}}} -\bibcite{hepler_10x_2018}{{46}{2018}{{Hepler}}{{}}} -\bibcite{hertz_identification_1990}{{47}{1990}{{Hertz et~al.}}{{}}} -\bibcite{hon_chromasig:_2008}{{48}{2008}{{Hon et~al.}}{{}}} -\bibcite{ioshikhes_variety_2011}{{49}{2011}{{Ioshikhes et~al.}}{{}}} -\bibcite{isakova_smile-seq_2017}{{50}{2017}{{Isakova et~al.}}{{}}} -\bibcite{jolma_multiplexed_2010}{{51}{2010}{{Jolma et~al.}}{{}}} -\bibcite{jolma_dna-binding_2013}{{52}{2013}{{Jolma et~al.}}{{}}} -\bibcite{kent_blatblast-like_2002}{{53}{2002}{{Kent}}{{}}} -\bibcite{khan_jaspar_2018}{{54}{2018}{{Khan et~al.}}{{}}} -\bibcite{kiselev_sc3:_2017}{{55}{2017}{{Kiselev et~al.}}{{}}} -\bibcite{kulakovskiy_hocomoco:_2018}{{56}{2018}{{Kulakovskiy et~al.}}{{}}} -\bibcite{kulakovskiy_hocomoco:_2016}{{57}{2016}{{Kulakovskiy et~al.}}{{}}} -\bibcite{kundaje_ubiquitous_2012}{{58}{2012}{{Kundaje et~al.}}{{}}} -\bibcite{kurotaki_transcriptional_2017}{{59}{2017}{{Kurotaki et~al.}}{{}}} -\bibcite{langmead_fast_2012}{{60}{2012}{{Langmead and Salzberg}}{{}}} -\bibcite{langmead_ultrafast_2009}{{61}{2009}{{Langmead et~al.}}{{}}} -\bibcite{li_sequence_2009}{{62}{2009}{{Li et~al.}}{{}}} -\bibcite{li_identification_2019}{{63}{2019}{{Li et~al.}}{{}}} -\bibcite{lizio_gateways_2015}{{64}{2015}{{Lizio et~al.}}{{}}} -\bibcite{losada_cohesin_2014}{{65}{2014}{{Losada}}{{}}} -\bibcite{maerkl_systems_2007}{{66}{2007}{{Maerkl and Quake}}{{}}} -\bibcite{maier_early_2004}{{67}{2004}{{Maier et~al.}}{{}}} -\bibcite{marsland_machine_2015-1}{{68}{2015}{{Marsland}}{{}}} -\bibcite{mathelier_jaspar_2014}{{69}{2014}{{Mathelier et~al.}}{{}}} -\bibcite{nair_probabilistic_2014}{{70}{2014}{{Nair et~al.}}{{}}} -\bibcite{neph_expansive_2012}{{71}{2012}{{Neph et~al.}}{{}}} -\bibcite{nielsen_catchprofiles:_2012}{{72}{2012}{{Nielsen et~al.}}{{}}} -\bibcite{ong_ctcf:_2014}{{73}{2014}{{Ong and Corces}}{{}}} -\bibcite{orenstein_comparative_2014}{{74}{2014}{{Orenstein and Shamir}}{{}}} -\bibcite{ou_motifstack_2018}{{75}{2018}{{Ou et~al.}}{{}}} -\bibcite{pizzi_fast_2008}{{76}{2008}{{Pizzi and Ukkonen}}{{}}} -\bibcite{pollard_detection_2010}{{77}{2010}{{Pollard et~al.}}{{}}} -\bibcite{quinlan_bedtools:_2010}{{78}{2010}{{Quinlan and Hall}}{{}}} -\bibcite{raney_track_2014}{{79}{2014}{{Raney et~al.}}{{}}} -\bibcite{rico_comparative_2017}{{80}{2017}{{Rico et~al.}}{{}}} -\bibcite{roadmap_epigenomics_consortium_integrative_2015}{{81}{2015}{{{Roadmap Epigenomics Consortium} et~al.}}{{}}} -\bibcite{rustici_arrayexpress_2013}{{82}{2013}{{Rustici et~al.}}{{}}} -\bibcite{schones_statistical_2007}{{83}{2007}{{Schones et~al.}}{{}}} -\bibcite{schutz_mamot:_2008}{{84}{2008}{{Sch\IeC {\"u}tz and Delorenzi}}{{}}} -\bibcite{siepel_evolutionarily_2005}{{85}{2005}{{Siepel et~al.}}{{}}} -\bibcite{soufi_pioneer_2015}{{86}{2015}{{Soufi et~al.}}{{}}} -\bibcite{stedman_cohesins_2008}{{87}{2008}{{Stedman et~al.}}{{}}} -\bibcite{trifonov_cracking_2011}{{88}{2011}{{Trifonov}}{{}}} -\bibcite{turatsinze_using_2008}{{89}{2008}{{Turatsinze et~al.}}{{}}} -\bibcite{vierstra_genomic_2016}{{90}{2016}{{Vierstra and Stamatoyannopoulos}}{{}}} -\bibcite{voss_dynamic_2014}{{91}{2014}{{Voss and Hager}}{{}}} -\bibcite{wang_sequence_2012}{{92}{2012}{{Wang et~al.}}{{}}} -\bibcite{weirauch_evaluation_2013}{{93}{2013}{{Weirauch et~al.}}{{}}} -\bibcite{wu_biogps:_2016}{{94}{2016}{{Wu et~al.}}{{}}} -\bibcite{zaret_pioneer_2011}{{95}{2011}{{Zaret and Carroll}}{{}}} -\bibcite{zhang_canonical_2014}{{96}{2014}{{Zhang et~al.}}{{}}} -\bibcite{zhao_tred:_2005}{{97}{2005}{{Zhao et~al.}}{{}}} -\bibcite{zhao_inferring_2009}{{98}{2009}{{Zhao et~al.}}{{}}} -\@writefile{toc}{\contentsline {chapter}{Bibliography}{111}{appendix*.63}} +\bibcite{hertz_identification_1990}{{46}{1990}{{Hertz et~al.}}{{}}} +\bibcite{hon_chromasig:_2008}{{47}{2008}{{Hon et~al.}}{{}}} +\bibcite{ioshikhes_variety_2011}{{48}{2011}{{Ioshikhes et~al.}}{{}}} +\bibcite{isakova_smile-seq_2017}{{49}{2017}{{Isakova et~al.}}{{}}} +\bibcite{jolma_multiplexed_2010}{{50}{2010}{{Jolma et~al.}}{{}}} +\bibcite{jolma_dna-binding_2013}{{51}{2013}{{Jolma et~al.}}{{}}} +\bibcite{kent_blatblast-like_2002}{{52}{2002}{{Kent}}{{}}} +\bibcite{khan_jaspar_2018}{{53}{2018}{{Khan et~al.}}{{}}} +\bibcite{kiselev_sc3:_2017}{{54}{2017}{{Kiselev et~al.}}{{}}} +\bibcite{kulakovskiy_hocomoco:_2018}{{55}{2018}{{Kulakovskiy et~al.}}{{}}} +\bibcite{kulakovskiy_hocomoco:_2016}{{56}{2016}{{Kulakovskiy et~al.}}{{}}} +\bibcite{kundaje_ubiquitous_2012}{{57}{2012}{{Kundaje et~al.}}{{}}} +\bibcite{kurotaki_transcriptional_2017}{{58}{2017}{{Kurotaki et~al.}}{{}}} +\bibcite{langmead_fast_2012}{{59}{2012}{{Langmead and Salzberg}}{{}}} +\bibcite{langmead_ultrafast_2009}{{60}{2009}{{Langmead et~al.}}{{}}} +\bibcite{li_sequence_2009}{{61}{2009}{{Li et~al.}}{{}}} +\bibcite{li_identification_2019}{{62}{2019}{{Li et~al.}}{{}}} +\bibcite{lizio_gateways_2015}{{63}{2015}{{Lizio et~al.}}{{}}} +\bibcite{losada_cohesin_2014}{{64}{2014}{{Losada}}{{}}} +\bibcite{maerkl_systems_2007}{{65}{2007}{{Maerkl and Quake}}{{}}} +\bibcite{maier_early_2004}{{66}{2004}{{Maier et~al.}}{{}}} +\bibcite{marsland_machine_2015-1}{{67}{2015}{{Marsland}}{{}}} +\bibcite{mathelier_jaspar_2014}{{68}{2014}{{Mathelier et~al.}}{{}}} +\bibcite{nair_probabilistic_2014}{{69}{2014}{{Nair et~al.}}{{}}} +\bibcite{neph_expansive_2012}{{70}{2012}{{Neph et~al.}}{{}}} +\bibcite{nielsen_catchprofiles:_2012}{{71}{2012}{{Nielsen et~al.}}{{}}} +\bibcite{ong_ctcf:_2014}{{72}{2014}{{Ong and Corces}}{{}}} +\bibcite{orenstein_comparative_2014}{{73}{2014}{{Orenstein and Shamir}}{{}}} +\bibcite{ou_motifstack_2018}{{74}{2018}{{Ou et~al.}}{{}}} +\bibcite{pizzi_fast_2008}{{75}{2008}{{Pizzi and Ukkonen}}{{}}} +\bibcite{pollard_detection_2010}{{76}{2010}{{Pollard et~al.}}{{}}} +\bibcite{quinlan_bedtools:_2010}{{77}{2010}{{Quinlan and Hall}}{{}}} +\bibcite{raney_track_2014}{{78}{2014}{{Raney et~al.}}{{}}} +\bibcite{rico_comparative_2017}{{79}{2017}{{Rico et~al.}}{{}}} +\bibcite{roadmap_epigenomics_consortium_integrative_2015}{{80}{2015}{{{Roadmap Epigenomics Consortium} et~al.}}{{}}} +\bibcite{rustici_arrayexpress_2013}{{81}{2013}{{Rustici et~al.}}{{}}} +\bibcite{schones_statistical_2007}{{82}{2007}{{Schones et~al.}}{{}}} +\bibcite{schutz_mamot:_2008}{{83}{2008}{{Sch\IeC {\"u}tz and Delorenzi}}{{}}} +\bibcite{siepel_evolutionarily_2005}{{84}{2005}{{Siepel et~al.}}{{}}} +\bibcite{soufi_pioneer_2015}{{85}{2015}{{Soufi et~al.}}{{}}} +\bibcite{stedman_cohesins_2008}{{86}{2008}{{Stedman et~al.}}{{}}} +\bibcite{trifonov_cracking_2011}{{87}{2011}{{Trifonov}}{{}}} +\bibcite{turatsinze_using_2008}{{88}{2008}{{Turatsinze et~al.}}{{}}} +\bibcite{vierstra_genomic_2016}{{89}{2016}{{Vierstra and Stamatoyannopoulos}}{{}}} +\bibcite{voss_dynamic_2014}{{90}{2014}{{Voss and Hager}}{{}}} +\bibcite{wang_sequence_2012}{{91}{2012}{{Wang et~al.}}{{}}} +\bibcite{weirauch_evaluation_2013}{{92}{2013}{{Weirauch et~al.}}{{}}} +\bibcite{wu_biogps:_2016}{{93}{2016}{{Wu et~al.}}{{}}} +\bibcite{zaret_pioneer_2011}{{94}{2011}{{Zaret and Carroll}}{{}}} +\bibcite{zhang_canonical_2014}{{95}{2014}{{Zhang et~al.}}{{}}} +\bibcite{zhao_tred:_2005}{{96}{2005}{{Zhao et~al.}}{{}}} +\bibcite{zhao_inferring_2009}{{97}{2009}{{Zhao et~al.}}{{}}} +\@writefile{toc}{\contentsline {chapter}{Bibliography}{113}{appendix*.63}} \@setckpt{tail/biblio}{ -\setcounter{page}{112} +\setcounter{page}{114} \setcounter{equation}{0} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{1} \setcounter{section}{1} \setcounter{subsection}{0} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{19} \setcounter{table}{0} -\setcounter{NAT@ctr}{98} +\setcounter{NAT@ctr}{97} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{39} \setcounter{algocfline}{3} \setcounter{algocfproc}{3} \setcounter{algocf}{3} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/tail/cv.aux b/tail/cv.aux index 53753dc..21059c7 100644 --- a/tail/cv.aux +++ b/tail/cv.aux @@ -1,49 +1,49 @@ \relax \providecommand\hyper@newdestlabel[2]{} -\@writefile{toc}{\contentsline {chapter}{Curriculum Vitae}{113}{section*.64}} +\@writefile{toc}{\contentsline {chapter}{Curriculum Vitae}{115}{section*.64}} \@setckpt{tail/cv}{ -\setcounter{page}{115} +\setcounter{page}{117} \setcounter{equation}{0} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{1} \setcounter{section}{1} \setcounter{subsection}{0} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{19} \setcounter{table}{0} -\setcounter{NAT@ctr}{98} +\setcounter{NAT@ctr}{97} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{39} \setcounter{algocfline}{3} \setcounter{algocfproc}{3} \setcounter{algocf}{3} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} }