Page MenuHomec4science

ch_atac-seq.aux
No OneTemporary

File Metadata

Created
Tue, May 14, 11:40

ch_atac-seq.aux

\relax
\providecommand\hyper@newdestlabel[2]{}
\citation{vierstra_genomic_2016}
\citation{neph_expansive_2012}
\citation{adey_rapid_2010,buenrostro_transposition_2013}
\citation{barski_high-resolution_2007}
\citation{vierstra_genomic_2016}
\citation{vierstra_genomic_2016}
\citation{adey_rapid_2010,buenrostro_transposition_2013}
\citation{adey_rapid_2010}
\citation{adey_rapid_2010}
\@writefile{toc}{\contentsline {chapter}{\numberline {4}Chromatin accessibility of monocytes}{57}{chapter.4}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{loa}{\addvspace {10\p@ }}
\@writefile{chapter}{\contentsline {toc}{Chromatin accessibility of monocytes}{57}{chapter.4}}
\@writefile{toc}{\contentsline {section}{\numberline {4.1}ATAC-seq}{57}{section.4.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces \textbf {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep {vierstra_genomic_2016}.\relax }}{58}{figure.caption.31}}
\newlabel{atac_seq_atac_seq}{{4.1}{58}{\textbf {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep {vierstra_genomic_2016}.\relax }{figure.caption.31}{}}
\citation{neph_expansive_2012}
\citation{berest_quantification_2018}
\citation{grossman_positional_2018}
\@writefile{toc}{\contentsline {section}{\numberline {4.2}Monitoring TF binding}{59}{section.4.2}}
\citation{angerer_single_2017}
\citation{fan_characterizing_2016,kiselev_sc3:_2017}
\citation{aibar_scenic:_2017}
\citation{gonzalez-blas_cistopic:_2019}
\citation{buenrostro_transposition_2013}
\@writefile{toc}{\contentsline {section}{\numberline {4.3}The advent of single cell DGF}{60}{section.4.3}}
\@writefile{toc}{\contentsline {section}{\numberline {4.4}A quick overview of scATAC-seq data analysis}{60}{section.4.4}}
\@writefile{toc}{\contentsline {section}{\numberline {4.5}Open questions}{60}{section.4.5}}
\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces \textbf {framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.\relax }}{61}{figure.caption.32}}
\newlabel{atac_seq_pipeline}{{4.2}{61}{\textbf {framework to identify chromatin organization and use them to annotate cellular state :} the scATAC-seq data available in each individual cell are aggregated and used a if it was a bulk sequencing experiment. Regions of interest are listed using peak calling on the the bulk data. The read densities in these regions (center of the peaks +/- a given offset) are measured. The regions are then clustered based on their signal shape to identify different chromatin architectures and create a catalog. These chromatin signatures can then be used to annotate each region of interest in each cell, based on the signal resemblance. The information can be stored as a matrix (M) that can be used for downstream analyses, such as sub-population identification.\relax }{figure.caption.32}{}}
\citation{hepler_10x_2018}
\citation{hon_chromasig:_2008}
\citation{nielsen_catchprofiles:_2012}
\citation{kundaje_ubiquitous_2012}
\citation{nair_probabilistic_2014}
\citation{groux_spar-k:_2019}
\@writefile{toc}{\contentsline {section}{\numberline {4.6}Data}{62}{section.4.6}}
\@writefile{toc}{\contentsline {section}{\numberline {4.7}Identification of catalog of chromatin architectures}{62}{section.4.7}}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.1}EMRead : an algorithm to identify over-represented chromatin architecture}{63}{subsection.4.7.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of EMRead, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure. EMJoint algorithm is the combination of both EMRead and EMSequence at the same time.\relax }}{63}{figure.caption.33}}
\newlabel{atac_seq_em}{{4.3}{63}{\textbf {Illustration of the expectation-maximization algorithms}\\ \textbf {A} illustration of EMRead, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}.\\ \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.\\ EMJoint algorithm is the combination of both EMRead and EMSequence at the same time.\relax }{figure.caption.33}{}}
\citation{nair_probabilistic_2014}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.2}EMSequence : an algorithm to identify over-represented sequences}{64}{subsection.4.7.2}}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\@writefile{toc}{\contentsline {subsubsection}{without shift and flip}{65}{subsection.4.7.2}}
\newlabel{atac_seq_emseq_likelihood}{{4.1}{65}{without shift and flip}{equation.4.7.1}{}}
\newlabel{atac_seq_emseq_update_model}{{4.2}{65}{without shift and flip}{equation.4.7.2}{}}
\@writefile{toc}{\contentsline {subsubsection}{with shift and flip}{65}{equation.4.7.2}}
\newlabel{atac_seq_emseq_likelihood_shift_flip}{{4.3}{65}{with shift and flip}{equation.4.7.3}{}}
\newlabel{atac_seq_emseq_reverse_motif}{{4.4}{65}{with shift and flip}{equation.4.7.4}{}}
\newlabel{atac_seq_emseq_update_model_shift_flip}{{4.5}{66}{with shift and flip}{equation.4.7.5}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{66}{subsection.4.7.3}}
\citation{nair_probabilistic_2014}
\citation{nair_probabilistic_2014}
\newlabel{atac_seq_emjoint_likelihood}{{4.6}{67}{EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{equation.4.7.6}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.4}Data realignment}{67}{subsection.4.7.4}}
\citation{voss_dynamic_2014}
\citation{cirillo_opening_2002,zaret_pioneer_2011,soufi_pioneer_2015}
\citation{buenrostro_transposition_2013}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.7.5}Implementations}{68}{subsection.4.7.5}}
\@writefile{toc}{\contentsline {section}{\numberline {4.8}Results}{68}{section.4.8}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.8.1}Fragment size analysis}{68}{subsection.4.8.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces \textbf {Fragment size analysis} \textbf {A :} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. The 10bp oscillation reflect the DNA pitch. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }}{69}{figure.caption.34}}
\newlabel{atac_seq_fragment_size}{{4.4}{69}{\textbf {Fragment size analysis} \textbf {A :} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. The 10bp oscillation reflect the DNA pitch.\\ A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes.\\ \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp.\\ \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }{figure.caption.34}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces \textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively. The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }}{70}{figure.caption.35}}
\newlabel{atac_seq_ctcf_all_data}{{4.5}{70}{\textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }{figure.caption.35}{}}
\citation{buenrostro_transposition_2013}
\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces \textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with using one PWM per TF. For each TF, the open chromatin architecture was measured by considering the corrected read edges (red) and the nucleosome occupancy (blue) by considering the center of the nucleosome fagments from the nucleosome fragment dataset. The motif location is indicated by the dashed lines.\relax }}{71}{figure.caption.36}}
\newlabel{atac_seq_ctcf_sp1_myc_ebf1_footprint}{{4.6}{71}{\textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with using one PWM per TF. For each TF, the open chromatin architecture was measured by considering the corrected read edges (red) and the nucleosome occupancy (blue) by considering the center of the nucleosome fagments from the nucleosome fragment dataset. The motif location is indicated by the dashed lines.\relax }{figure.caption.36}{}}
\citation{adey_rapid_2010}
\citation{buenrostro_transposition_2013,li_identification_2019}
\citation{neph_expansive_2012}
\citation{fu_insulator_2008}
\citation{neph_expansive_2012}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.8.2}Measuring open chromatin and nucleosome occupancy}{72}{subsection.4.8.2}}
\citation{kundaje_ubiquitous_2012}
\citation{nair_probabilistic_2014}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.8.3}Evaluation of EMRead and EMSequence}{73}{subsection.4.8.3}}
\@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs :} EMRead was run without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{74}{figure.caption.37}}
\newlabel{atac_seq_emread_ctcf_noshift_flip}{{4.7}{74}{\textbf {Open chromatin classes around CTCF motifs :} EMRead was run without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.37}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs :} EMRead was run with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{74}{figure.caption.38}}
\newlabel{atac_seq_emread_ctcf_shift_flip}{{4.8}{74}{\textbf {Open chromatin classes around CTCF motifs :} EMRead was run with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.38}{}}
\@writefile{toc}{\contentsline {subsubsection}{EMRead}{75}{subsection.4.8.3}}
\@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces \textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }}{76}{figure.caption.39}}
\newlabel{atac_seq_emseq_auc_roc}{{4.9}{76}{\textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }{figure.caption.39}{}}
\@writefile{toc}{\contentsline {subsubsection}{EMSequence}{76}{figure.caption.38}}
\@writefile{lof}{\contentsline {figure}{\numberline {4.10}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }}{77}{figure.caption.40}}
\newlabel{atac_seq_emseq_sp1_10class}{{4.10}{77}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }{figure.caption.40}{}}
\citation{chatr-aryamontri_biogrid_2017}
\citation{castro-mondragon_rsat_2017}
\@setckpt{main/ch_atac-seq}{
\setcounter{page}{79}
\setcounter{equation}{6}
\setcounter{enumi}{13}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{chapter}{4}
\setcounter{section}{8}
\setcounter{subsection}{3}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{10}
\setcounter{table}{0}
\setcounter{NAT@ctr}{0}
\setcounter{FBcaption@count}{0}
\setcounter{ContinuedFloat}{0}
\setcounter{KVtest}{0}
\setcounter{subfigure}{0}
\setcounter{subfigure@save}{0}
\setcounter{lofdepth}{1}
\setcounter{subtable}{0}
\setcounter{subtable@save}{0}
\setcounter{lotdepth}{1}
\setcounter{lips@count}{2}
\setcounter{lstnumber}{1}
\setcounter{Item}{13}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{0}
\setcounter{AM@survey}{0}
\setcounter{ttlp@side}{0}
\setcounter{myparts}{0}
\setcounter{parentequation}{0}
\setcounter{AlgoLine}{17}
\setcounter{algocfline}{2}
\setcounter{algocfproc}{2}
\setcounter{algocf}{2}
\setcounter{float@type}{8}
\setcounter{nlinenum}{0}
\setcounter{lstlisting}{0}
\setcounter{section@level}{0}
}

Event Timeline