Page MenuHomec4science

appendix.aux
No OneTemporary

File Metadata

Created
Sun, Apr 28, 10:47

appendix.aux

\relax
\providecommand\hyper@newdestlabel[2]{}
\@writefile{toc}{\contentsline {chapter}{\numberline {A}Supplementary material}{111}{appendix.A}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{loa}{\addvspace {10\p@ }}
\@writefile{toc}{\contentsline {section}{\numberline {A.1}ENCODE peaks analysis supplementary material}{112}{section.A.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.1}{\ignorespaces \textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{112}{figure.caption.39}}
\newlabel{suppl_encode_peaks_em_ctcf}{{A.1}{112}{\textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.39}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.2}{\ignorespaces \textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{113}{figure.caption.40}}
\newlabel{suppl_encode_peaks_em_nrf1}{{A.2}{113}{\textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.40}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.3}{\ignorespaces \textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{114}{figure.caption.41}}
\newlabel{suppl_encode_peaks_em_cfos}{{A.3}{114}{\textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.41}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.4}{\ignorespaces \textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{115}{figure.caption.42}}
\newlabel{suppl_encode_peaks_em_max}{{A.4}{115}{\textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.42}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.5}{\ignorespaces \textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{116}{figure.caption.43}}
\newlabel{suppl_encode_peaks_em_brca1}{{A.5}{116}{\textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.43}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.6}{\ignorespaces \textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }}{117}{figure.caption.44}}
\newlabel{suppl_encode_peaks_ctcf_ndr}{{A.6}{117}{\textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }{figure.caption.44}{}}
\citation{khan_jaspar_2018}
\citation{khan_jaspar_2018}
\@writefile{lof}{\contentsline {figure}{\numberline {A.7}{\ignorespaces \textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }}{118}{figure.caption.45}}
\newlabel{suppl_encode_peaks_jund_association}{{A.7}{118}{\textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }{figure.caption.45}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.8}{\ignorespaces \textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{119}{figure.caption.46}}
\newlabel{suppl_encode_peaks_ebf1_nucl}{{A.8}{119}{\textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.46}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.9}{\ignorespaces \textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }}{119}{figure.caption.47}}
\newlabel{suppl_encode_peaks_ebf1_logo}{{A.9}{119}{\textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }{figure.caption.47}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.10}{\ignorespaces \textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }}{120}{figure.caption.48}}
\newlabel{suppl_encode_peaks_ebf1_chrom}{{A.10}{120}{\textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }{figure.caption.48}{}}
\@writefile{toc}{\contentsline {section}{\numberline {A.2}SPar-K supplementary material}{122}{section.A.2}}
\newlabel{algo_spark}{{3}{122}{SPar-K supplementary material}{algocfline.3}{}}
\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces SPar-K algorithm.\relax }}{122}{algocf.3}}
\newlabel{algo_smooth_outliers}{{4}{123}{SPar-K supplementary material}{algocfline.4}{}}
\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Smooth the data matrix by removing outliers.\relax }}{123}{algocf.4}}
\newlabel{algo_distance_fast}{{5}{125}{SPar-K supplementary material}{algocfline.5}{}}
\@writefile{loa}{\contentsline {algocf}{\numberline {5}{\ignorespaces Fast algorithm to compute the correlation distance with shift and flip\relax }}{125}{algocf.5}}
\newlabel{initialize_algo}{{6}{126}{SPar-K supplementary material}{algocfline.6}{}}
\@writefile{loa}{\contentsline {algocf}{\numberline {6}{\ignorespaces A routine of distanceFast() that initializes all the necessary variables. This function can access and modify variables in distanceFast().\relax }}{126}{algocf.6}}
\@writefile{loa}{\contentsline {algocf}{\numberline {7}{\ignorespaces A routine of distanceFast() computing all distances with $X$ having a shift of 0. This function can access and modify all variables declared in distanceFast().\relax }}{128}{algocf.7}}
\@writefile{loa}{\contentsline {algocf}{\numberline {8}{\ignorespaces A routine of distanceFast() computing all distances with $Y$ having a shift of 0. This function is can access and modify all variables declared in distanceFast().\relax }}{130}{algocf.8}}
\@writefile{loa}{\contentsline {algocf}{\numberline {9}{\ignorespaces A routine of distanceFast() computing all remaining distances between $X$ and $Y$. This function can access and modify all variables declared in distanceFast().\relax }}{132}{algocf.9}}
\newlabel{algo_seed_random}{{10}{133}{SPar-K supplementary material}{algocfline.10}{}}
\@writefile{loa}{\contentsline {algocf}{\numberline {10}{\ignorespaces Random seeding algorithm\relax }}{133}{algocf.10}}
\citation{jolma_dna-binding_2013}
\citation{jolma_dna-binding_2013}
\newlabel{algo_seed_kmeans++}{{11}{134}{SPar-K supplementary material}{algocfline.11}{}}
\@writefile{loa}{\contentsline {algocf}{\numberline {11}{\ignorespaces Kmeans++ seeding algorithm.\relax }}{134}{algocf.11}}
\citation{buenrostro_transposition_2013}
\citation{buenrostro_transposition_2013}
\@writefile{toc}{\contentsline {section}{\numberline {A.3}SMiLE-seq supplementary material}{135}{section.A.3}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.11}{\ignorespaces \textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }}{135}{figure.caption.49}}
\newlabel{suppl_smileseq_auc_2}{{A.11}{135}{\textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }{figure.caption.49}{}}
\@writefile{toc}{\contentsline {section}{\numberline {A.4}Chromatin accessibility of monocytes supplementary material}{135}{section.A.4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.4.1}Fragment size analysis}{135}{subsection.A.4.1}}
\newlabel{suppl_atac_seq_fragment_size}{{A.4.1}{135}{Fragment size analysis}{subsection.A.4.1}{}}
\citation{adey_rapid_2010}
\citation{buenrostro_transposition_2013,li_identification_2019}
\@writefile{lof}{\contentsline {figure}{\numberline {A.12}{\ignorespaces \textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }}{136}{figure.caption.50}}
\newlabel{atac_seq_fragment_size}{{A.12}{136}{\textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }{figure.caption.50}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.4.2}Measuring open chromatin and nucleosome occupancy}{136}{subsection.A.4.2}}
\newlabel{suppl_atac_seq_measuring_signal}{{A.4.2}{136}{Measuring open chromatin and nucleosome occupancy}{subsection.A.4.2}{}}
\citation{neph_expansive_2012}
\citation{fu_insulator_2008}
\citation{neph_expansive_2012}
\@writefile{lof}{\contentsline {figure}{\numberline {A.13}{\ignorespaces \textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively. The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }}{137}{figure.caption.51}}
\newlabel{atac_seq_ctcf_all_data}{{A.13}{137}{\textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }{figure.caption.51}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.14}{\ignorespaces \textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites (see section \ref {atac_seq_method_pwmscan}). For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }}{138}{figure.caption.52}}
\newlabel{atac_seq_ctcf_sp1_myc_ebf1_footprint}{{A.14}{138}{\textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites (see section \ref {atac_seq_method_pwmscan}). For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }{figure.caption.52}{}}
\citation{kundaje_ubiquitous_2012}
\citation{ou_motifstack_2018}
\citation{ou_motifstack_2018}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.4.3}Evaluation of EMSequence and ChIPPartitioning}{139}{subsection.A.4.3}}
\newlabel{suppl_eval_emseq_chippartitioning}{{A.4.3}{139}{Evaluation of EMSequence and ChIPPartitioning}{subsection.A.4.3}{}}
\@writefile{toc}{\contentsline {subsubsection}{EMSequence}{139}{subsection.A.4.3}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.15}{\ignorespaces \textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }}{140}{figure.caption.53}}
\newlabel{suppl_atac_seq_emseq_best_motifs}{{A.15}{140}{\textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }{figure.caption.53}{}}
\citation{kent_blatblast-like_2002}
\citation{chatr-aryamontri_biogrid_2017}
\citation{castro-mondragon_rsat_2017}
\@writefile{lof}{\contentsline {figure}{\numberline {A.16}{\ignorespaces \textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }}{141}{figure.caption.54}}
\newlabel{suppl_atac_seq_emseq_auc_roc}{{A.16}{141}{\textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }{figure.caption.54}{}}
\citation{nair_probabilistic_2014}
\@writefile{lof}{\contentsline {figure}{\numberline {A.17}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }}{142}{figure.caption.55}}
\newlabel{suppl_atac_seq_emseq_sp1_7class}{{A.17}{142}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }{figure.caption.55}{}}
\@writefile{toc}{\contentsline {subsubsection}{ChIPPartitioning}{142}{figure.caption.56}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.18}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{143}{figure.caption.56}}
\newlabel{suppl_atac_seq_emseq_sp1_10class}{{A.18}{143}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.56}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.19}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{144}{figure.caption.57}}
\newlabel{suppl_atac_seq_emread_ctcf_noshift_flip}{{A.19}{144}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.57}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.20}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{145}{figure.caption.58}}
\newlabel{suppl_atac_seq_emread_sp1_noshift_flip}{{A.20}{145}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.58}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.4.4}Other supplementary figures}{145}{subsection.A.4.4}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.23}{\ignorespaces \textbf {Extended sequence and chromatin models} found in monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{145}{figure.caption.61}}
\newlabel{suppl_atac_seq_23class}{{A.23}{145}{\textbf {Extended sequence and chromatin models} found in monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.61}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.21}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{146}{figure.caption.59}}
\newlabel{suppl_atac_seq_emread_ctcf_shift_flip}{{A.21}{146}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.59}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.24}{\ignorespaces \textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{146}{figure.caption.62}}
\newlabel{suppl_atac_seq_pu1_subclass}{{A.24}{146}{\textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.62}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.22}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{147}{figure.caption.60}}
\newlabel{suppl_atac_seq_emread_sp1_shift_flip}{{A.22}{147}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.60}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {A.25}{\ignorespaces \textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{147}{figure.caption.63}}
\newlabel{suppl_atac_seq_ap1_subclass}{{A.25}{147}{\textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.63}{}}
\@setckpt{tail/appendix}{
\setcounter{page}{148}
\setcounter{equation}{0}
\setcounter{enumi}{8}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{chapter}{1}
\setcounter{section}{4}
\setcounter{subsection}{4}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{25}
\setcounter{table}{0}
\setcounter{NAT@ctr}{0}
\setcounter{FBcaption@count}{0}
\setcounter{ContinuedFloat}{0}
\setcounter{KVtest}{0}
\setcounter{subfigure}{0}
\setcounter{subfigure@save}{0}
\setcounter{lofdepth}{1}
\setcounter{subtable}{0}
\setcounter{subtable@save}{0}
\setcounter{lotdepth}{1}
\setcounter{lips@count}{2}
\setcounter{lstnumber}{1}
\setcounter{Item}{8}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{0}
\setcounter{AM@survey}{0}
\setcounter{ttlp@side}{0}
\setcounter{myparts}{0}
\setcounter{parentequation}{0}
\setcounter{AlgoLine}{23}
\setcounter{algocfline}{11}
\setcounter{algocfproc}{11}
\setcounter{algocf}{11}
\setcounter{float@type}{8}
\setcounter{nlinenum}{0}
\setcounter{lstlisting}{0}
\setcounter{section@level}{0}
}

Event Timeline