Page MenuHomec4science

ch_atac-seq.aux
No OneTemporary

File Metadata

Created
Fri, May 31, 19:20

ch_atac-seq.aux

\relax
\providecommand\hyper@newdestlabel[2]{}
\citation{neph_expansive_2012}
\citation{berest_quantification_2018}
\citation{grossman_positional_2018}
\@writefile{toc}{\contentsline {chapter}{\numberline {5}Chromatin accessibility of monocytes}{67}{chapter.5}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{loa}{\addvspace {10\p@ }}
\newlabel{atac_seq}{{5}{67}{Chromatin accessibility of monocytes}{chapter.5}{}}
\@writefile{chapter}{\contentsline {toc}{Chromatin accessibility of monocytes}{67}{chapter.5}}
\@writefile{toc}{\contentsline {section}{\numberline {5.1}Monitoring TF binding}{67}{section.5.1}}
\citation{angerer_single_2017}
\@writefile{toc}{\contentsline {section}{\numberline {5.2}The advent of single cell DGF}{68}{section.5.2}}
\@writefile{toc}{\contentsline {section}{\numberline {5.3}Open issues}{68}{section.5.3}}
\@writefile{toc}{\contentsline {section}{\numberline {5.4}Data}{68}{section.5.4}}
\citation{hon_chromasig:_2008}
\citation{nielsen_catchprofiles:_2012}
\citation{kundaje_ubiquitous_2012}
\citation{nair_probabilistic_2014}
\citation{groux_spar-k:_2019}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\@writefile{toc}{\contentsline {section}{\numberline {5.5}Identifying over-represented signals}{69}{section.5.5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.5.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{69}{subsection.5.5.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.5.2}EMSequence : an algorithm to identify over-represented sequences}{69}{subsection.5.5.2}}
\@writefile{lof}{\contentsline {figure}{\numberline {5.1}{\ignorespaces \textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure. EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }}{70}{figure.caption.33}}
\newlabel{atac_seq_em}{{5.1}{70}{\textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.\\ EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }{figure.caption.33}{}}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\@writefile{toc}{\contentsline {subsubsection}{without shift and flip}{71}{figure.caption.33}}
\newlabel{atac_seq_emseq_likelihood}{{5.1}{71}{without shift and flip}{equation.5.5.1}{}}
\newlabel{atac_seq_emseq_update_model}{{5.2}{71}{without shift and flip}{equation.5.5.2}{}}
\@writefile{toc}{\contentsline {subsubsection}{with shift and flip}{71}{equation.5.5.2}}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\newlabel{atac_seq_emseq_likelihood_shift_flip}{{5.3}{72}{with shift and flip}{equation.5.5.3}{}}
\newlabel{atac_seq_emseq_reverse_motif}{{5.4}{72}{with shift and flip}{equation.5.5.4}{}}
\newlabel{atac_seq_emseq_update_model_shift_flip}{{5.5}{72}{with shift and flip}{equation.5.5.5}{}}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.5.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{73}{subsection.5.5.3}}
\newlabel{atac_seq_emjoint_likelihood}{{5.6}{73}{EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{equation.5.5.6}{}}
\citation{voss_dynamic_2014}
\citation{cirillo_opening_2002,zaret_pioneer_2011,soufi_pioneer_2015}
\citation{buenrostro_transposition_2013}
\citation{buenrostro_transposition_2013}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.5.4}Data realignment}{74}{subsection.5.5.4}}
\@writefile{toc}{\contentsline {section}{\numberline {5.6}Results}{74}{section.5.6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.1}Fragment size analysis}{74}{subsection.5.6.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {5.2}{\ignorespaces \textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }}{75}{figure.caption.34}}
\newlabel{atac_seq_fragment_size}{{5.2}{75}{\textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }{figure.caption.34}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {5.3}{\ignorespaces \textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively. The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }}{76}{figure.caption.35}}
\newlabel{atac_seq_ctcf_all_data}{{5.3}{76}{\textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }{figure.caption.35}{}}
\citation{adey_rapid_2010}
\citation{buenrostro_transposition_2013,li_identification_2019}
\@writefile{lof}{\contentsline {figure}{\numberline {5.4}{\ignorespaces \textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }}{77}{figure.caption.36}}
\newlabel{atac_seq_ctcf_sp1_myc_ebf1_footprint}{{5.4}{77}{\textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }{figure.caption.36}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.2}Measuring open chromatin and nucleosome occupancy}{77}{subsection.5.6.2}}
\citation{neph_expansive_2012}
\citation{fu_insulator_2008}
\citation{neph_expansive_2012}
\citation{kundaje_ubiquitous_2012}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.3}Evaluation of EMSequence and ChIPPartitioning}{79}{subsection.5.6.3}}
\@writefile{toc}{\contentsline {subsubsection}{EMSequence}{79}{subsection.5.6.3}}
\citation{kent_blatblast-like_2002}
\citation{chatr-aryamontri_biogrid_2017}
\citation{castro-mondragon_rsat_2017}
\@writefile{lof}{\contentsline {figure}{\numberline {5.5}{\ignorespaces \textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }}{80}{figure.caption.37}}
\newlabel{atac_seq_emseq_auc_roc}{{5.5}{80}{\textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }{figure.caption.37}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {5.6}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }}{81}{figure.caption.38}}
\newlabel{atac_seq_emseq_sp1_10class}{{5.6}{81}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }{figure.caption.38}{}}
\citation{nair_probabilistic_2014}
\@writefile{lof}{\contentsline {figure}{\numberline {5.7}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{82}{figure.caption.39}}
\newlabel{atac_seq_emread_ctcf_noshift_flip}{{5.7}{82}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.39}{}}
\@writefile{toc}{\contentsline {subsubsection}{ChIPPartitioning}{82}{figure.caption.38}}
\@writefile{lof}{\contentsline {figure}{\numberline {5.8}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{83}{figure.caption.40}}
\newlabel{atac_seq_emread_ctcf_shift_flip}{{5.8}{83}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.40}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5.7}Aligning the binding sites}{84}{section.5.7}}
\@writefile{lof}{\contentsline {figure}{\numberline {5.9}{\ignorespaces \textbf {Central parts of the extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{85}{figure.caption.41}}
\newlabel{atac_seq_23class}{{5.9}{85}{\textbf {Central parts of the extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.41}{}}
\citation{kurotaki_transcriptional_2017,rico_comparative_2017}
\citation{castro-mondragon_rsat_2017}
\@writefile{lof}{\contentsline {figure}{\numberline {5.10}{\ignorespaces \textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{87}{figure.caption.42}}
\newlabel{atac_seq_ctcf_subclass}{{5.10}{87}{\textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.42}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5.8}Exploring individual TF classes}{87}{section.5.8}}
\citation{marsland_machine_2015-1}
\citation{fan_characterizing_2016,kiselev_sc3:_2017}
\citation{aibar_scenic:_2017}
\citation{gonzalez-blas_cistopic:_2019}
\@writefile{toc}{\contentsline {section}{\numberline {5.9}Discussions}{88}{section.5.9}}
\@writefile{toc}{\contentsline {section}{\numberline {5.10}Perspectives}{88}{section.5.10}}
\@writefile{toc}{\contentsline {section}{\numberline {5.11}Methods}{89}{section.5.11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.11.1}Partitioning programs}{89}{subsection.5.11.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.11.2}Fragment classes}{89}{subsection.5.11.2}}
\newlabel{atac_seq_fragment_length_class}{{5.7}{89}{Fragment classes}{equation.5.11.7}{}}
\citation{castro-mondragon_rsat_2017}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.11.3}Simulated sequences}{90}{subsection.5.11.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.11.4}Realignment using JASPAR motifs}{90}{subsection.5.11.4}}
\@writefile{lot}{\contentsline {table}{\numberline {5.1}{\ignorespaces \textbf {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }}{91}{table.caption.43}}
\newlabel{atac_seq_motif_table}{{5.1}{91}{\textbf {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }{table.caption.43}{}}
\citation{nair_probabilistic_2014}
\citation{dalton_clustering_2009}
\citation{nair_probabilistic_2014}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.11.5}Model extension}{92}{subsection.5.11.5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.11.6}Extracting data assigned to a class}{92}{subsection.5.11.6}}
\newlabel{encode_peaks_algo_ndr_extend}{{3}{94}{Extracting data assigned to a class}{algocfline.3}{}}
\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Computes a matrix containing the data assigned to a given class $S$.\relax }}{94}{algocf.3}}
\newlabel{atac_seq_algo_extract_class}{{3}{94}{Extracting data assigned to a class}{algocf.3}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.11.7}Peak processing}{95}{subsection.5.11.7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.11.8}Per TF classes}{95}{subsection.5.11.8}}
\newlabel{atac_seq_method_per_tf_class}{{5.11.8}{95}{Per TF classes}{subsection.5.11.8}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.11.9}Per TF sub-classes}{95}{subsection.5.11.9}}
\@setckpt{main/ch_atac-seq}{
\setcounter{page}{97}
\setcounter{equation}{7}
\setcounter{enumi}{13}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{chapter}{5}
\setcounter{section}{11}
\setcounter{subsection}{9}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{10}
\setcounter{table}{1}
\setcounter{NAT@ctr}{0}
\setcounter{FBcaption@count}{0}
\setcounter{ContinuedFloat}{0}
\setcounter{KVtest}{0}
\setcounter{subfigure}{0}
\setcounter{subfigure@save}{0}
\setcounter{lofdepth}{1}
\setcounter{subtable}{0}
\setcounter{subtable@save}{0}
\setcounter{lotdepth}{1}
\setcounter{lips@count}{2}
\setcounter{lstnumber}{1}
\setcounter{Item}{13}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{0}
\setcounter{AM@survey}{0}
\setcounter{ttlp@side}{0}
\setcounter{myparts}{0}
\setcounter{parentequation}{0}
\setcounter{AlgoLine}{39}
\setcounter{algocfline}{3}
\setcounter{algocfproc}{3}
\setcounter{algocf}{3}
\setcounter{float@type}{8}
\setcounter{nlinenum}{0}
\setcounter{lstlisting}{0}
\setcounter{section@level}{0}
}

Event Timeline