diff --git a/images/ch_atac-seq/data_classCTCF_8class.png b/images/ch_atac-seq/data_classCTCF_8class.png index 8d8c63a..d224780 100644 Binary files a/images/ch_atac-seq/data_classCTCF_8class.png and b/images/ch_atac-seq/data_classCTCF_8class.png differ diff --git a/images/ch_atac-seq/data_classPU.1_2class.png b/images/ch_atac-seq/data_classPU.1_2class.png index 87d3c6b..0b1ad38 100644 Binary files a/images/ch_atac-seq/data_classPU.1_2class.png and b/images/ch_atac-seq/data_classPU.1_2class.png differ diff --git a/images/ch_atac-seq/data_classjun_3class.png b/images/ch_atac-seq/data_classjun_3class.png index 8791eb3..943f0fc 100644 Binary files a/images/ch_atac-seq/data_classjun_3class.png and b/images/ch_atac-seq/data_classjun_3class.png differ diff --git a/images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png b/images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png index 4d3ca10..eac673a 100644 Binary files a/images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png and b/images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png differ diff --git a/images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png b/images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png index 74e72a9..6056e6a 100644 Binary files a/images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png and b/images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png differ diff --git a/images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png b/images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png index 860bf77..25a9fa5 100644 Binary files a/images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png and b/images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png differ diff --git a/images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png b/images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png index 1265432..c5e7592 100644 Binary files a/images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png and b/images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png differ diff --git a/images/ch_atac-seq/sp1_motifs_6class_shift_flip.png b/images/ch_atac-seq/sp1_motifs_6class_shift_flip.png index a939e6a..c99cad0 100644 Binary files a/images/ch_atac-seq/sp1_motifs_6class_shift_flip.png and b/images/ch_atac-seq/sp1_motifs_6class_shift_flip.png differ diff --git a/images/ch_spark/figure1.pdf b/images/ch_spark/figure1.pdf deleted file mode 100644 index ce84ff3..0000000 Binary files a/images/ch_spark/figure1.pdf and /dev/null differ diff --git a/images/ch_spark/figure1.png b/images/ch_spark/figure1.png new file mode 100644 index 0000000..5cff1c8 Binary files /dev/null and b/images/ch_spark/figure1.png differ diff --git a/images/ch_spark/supplemental_figure1.pdf b/images/ch_spark/supplemental_figure1.pdf deleted file mode 100644 index 90d3ab2..0000000 Binary files a/images/ch_spark/supplemental_figure1.pdf and /dev/null differ diff --git a/images/ch_spark/supplemental_figure1.png b/images/ch_spark/supplemental_figure1.png new file mode 100644 index 0000000..80436fc Binary files /dev/null and b/images/ch_spark/supplemental_figure1.png differ diff --git a/images/ch_spark/supplemental_figure2.pdf b/images/ch_spark/supplemental_figure2.pdf deleted file mode 100644 index 403a02b..0000000 Binary files a/images/ch_spark/supplemental_figure2.pdf and /dev/null differ diff --git a/images/ch_spark/supplemental_figure2.png b/images/ch_spark/supplemental_figure2.png new file mode 100644 index 0000000..c901092 Binary files /dev/null and b/images/ch_spark/supplemental_figure2.png differ diff --git a/images/ch_spark/supplemental_figure4.pdf b/images/ch_spark/supplemental_figure4.pdf deleted file mode 100644 index d1fcc93..0000000 Binary files a/images/ch_spark/supplemental_figure4.pdf and /dev/null differ diff --git a/images/ch_spark/supplemental_figure4.png b/images/ch_spark/supplemental_figure4.png new file mode 100644 index 0000000..417eb23 Binary files /dev/null and b/images/ch_spark/supplemental_figure4.png differ diff --git a/images/ch_spark/supplemental_figure5.pdf b/images/ch_spark/supplemental_figure5.pdf deleted file mode 100644 index 8ced377..0000000 Binary files a/images/ch_spark/supplemental_figure5.pdf and /dev/null differ diff --git a/images/ch_spark/supplemental_figure5.png b/images/ch_spark/supplemental_figure5.png new file mode 100644 index 0000000..cca2e5b Binary files /dev/null and b/images/ch_spark/supplemental_figure5.png differ diff --git a/images/ch_spark/supplemental_figure8.pdf b/images/ch_spark/supplemental_figure8.pdf deleted file mode 100644 index af42418..0000000 Binary files a/images/ch_spark/supplemental_figure8.pdf and /dev/null differ diff --git a/images/ch_spark/supplemental_figure8.png b/images/ch_spark/supplemental_figure8.png new file mode 100644 index 0000000..d7a4579 Binary files /dev/null and b/images/ch_spark/supplemental_figure8.png differ diff --git a/main/ch_encode_peaks.aux b/main/ch_encode_peaks.aux index 2216e96..742412c 100644 --- a/main/ch_encode_peaks.aux +++ b/main/ch_encode_peaks.aux @@ -1,160 +1,160 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{consortium_integrated_2012} \citation{cheng_understanding_2012} \citation{cheng_understanding_2012} \citation{mathelier_jaspar_2014} \citation{kulakovskiy_hocomoco:_2016} \citation{jolma_dna-binding_2013} \citation{cheng_understanding_2012} \citation{mathelier_jaspar_2014} \citation{kulakovskiy_hocomoco:_2016} \citation{jolma_dna-binding_2013} \citation{cheng_understanding_2012} \citation{kundaje_ubiquitous_2012} \citation{thurman_accessible_2012} \citation{gerstein_architecture_2012} \@writefile{toc}{\contentsline {chapter}{\numberline {3}ENCODE peaks analysis}{33}{chapter.3}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} \newlabel{encode_peaks}{{3}{33}{ENCODE peaks analysis}{chapter.3}{}} \@writefile{toc}{\contentsline {chapter}{ENCODE peaks analysis}{33}{chapter.3}} \@writefile{toc}{\contentsline {section}{\numberline {3.1}Data}{33}{section.3.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces \textbf {Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000.\relax }}{34}{figure.caption.15}} -\newlabel{encode_peaks_gm12878_peak_number}{{3.1}{34}{\textbf {Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000.\relax }{figure.caption.15}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces \textbf {Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.\relax }}{34}{figure.caption.16}} -\newlabel{encode_peaks_gm12878_motif_prop}{{3.2}{34}{\textbf {Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.\relax }{figure.caption.16}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces \textbf {Number of peaks in GM12878} called by ENCODE for each ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000 peaks respectively.\relax }}{34}{figure.caption.15}} +\newlabel{encode_peaks_gm12878_peak_number}{{3.1}{34}{\textbf {Number of peaks in GM12878} called by ENCODE for each ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000 peaks respectively.\relax }{figure.caption.15}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces \textbf {Proportion of peaks with a motif in GM12878}, for each ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM modeling the TF binding specificity. Each TF was associated to a log-odd PWM contained either from JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance (with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$) could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.\relax }}{34}{figure.caption.16}} +\newlabel{encode_peaks_gm12878_motif_prop}{{3.2}{34}{\textbf {Proportion of peaks with a motif in GM12878}, for each ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM modeling the TF binding specificity. Each TF was associated to a log-odd PWM contained either from JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance (with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$) could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.\relax }{figure.caption.16}{}} \citation{wu_biogps:_2016} \citation{nair_probabilistic_2014} \@writefile{toc}{\contentsline {section}{\numberline {3.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{35}{section.3.2}} \newlabel{encode_peaks_chippartitioning}{{3.2}{35}{ChIPPartitioning : an algorithm to identify chromatin architectures}{section.3.2}{}} \newlabel{encode_peaks_eq_em_data_model}{{3.1}{35}{ChIPPartitioning : an algorithm to identify chromatin architectures}{equation.3.2.1}{}} \citation{bailey_fitting_1994} \citation{nair_probabilistic_2014} \newlabel{encode_peaks_eq_em_update}{{3.2}{36}{ChIPPartitioning : an algorithm to identify chromatin architectures}{equation.3.2.2}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.2.1}Data realignment}{36}{subsection.3.2.1}} \newlabel{encode_peaks_data_realign}{{3.2.1}{36}{Data realignment}{subsection.3.2.1}{}} \citation{kundaje_ubiquitous_2012} \citation{zhang_canonical_2014} \@writefile{toc}{\contentsline {section}{\numberline {3.3}Nucleosome organization around transcription factor binding sites}{37}{section.3.3}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.3}{\ignorespaces \textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.\relax }}{38}{figure.caption.17}} -\newlabel{encode_peaks_array_measure}{{3.3}{38}{\textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.\relax }{figure.caption.17}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.3}{\ignorespaces \textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TF binding site using 10bp bins. The TF binding site were then classified into 4 classes according to their nucleosome patterns using ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represents the proportion of the highest signal for each chromatin pattern.\relax }}{38}{figure.caption.17}} +\newlabel{encode_peaks_array_measure}{{3.3}{38}{\textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TF binding site using 10bp bins. The TF binding site were then classified into 4 classes according to their nucleosome patterns using ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represents the proportion of the highest signal for each chromatin pattern.\relax }{figure.caption.17}{}} \citation{kundaje_ubiquitous_2012,fu_insulator_2008} -\@writefile{toc}{\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{39}{section.3.4}} -\newlabel{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}{{3.4}{39}{The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{section.3.4}{}} \@writefile{lof}{\contentsline {figure}{\numberline {3.4}{\ignorespaces \textbf { Colocalization with CTCF peaks in GM12878 cells : } \textbf {A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf {B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf {C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.\relax }}{40}{figure.caption.18}} \newlabel{encode_peaks_colocalization_ctcf}{{3.4}{40}{\textbf { Colocalization with CTCF peaks in GM12878 cells : } \textbf {A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf {B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf {C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.\relax }{figure.caption.18}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.5}{\ignorespaces \textbf {Nucleosome free region at CTCF binding sites} \textbf {a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }}{41}{figure.caption.19}} -\newlabel{encode_peaks_ctcf_ndr}{{3.5}{41}{\textbf {Nucleosome free region at CTCF binding sites} \textbf {a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }{figure.caption.19}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.5}{\ignorespaces \textbf {Nucleosome free regions at CTCF binding sites} \textbf {A} The NDR lengths are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }}{41}{figure.caption.19}} +\newlabel{encode_peaks_ctcf_ndr}{{3.5}{41}{\textbf {Nucleosome free regions at CTCF binding sites} \textbf {A} The NDR lengths are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }{figure.caption.19}{}} \citation{stedman_cohesins_2008} \citation{losada_cohesin_2014} \citation{donohoe_identification_2007} \citation{bailey_znf143_2015} +\@writefile{toc}{\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{42}{section.3.4}} +\newlabel{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}{{3.4}{42}{The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{section.3.4}{}} \citation{ong_ctcf:_2014,ghirlando_ctcf:_2016} \citation{wang_sequence_2012,neph_expansive_2012,consortium_integrated_2012,guo_high_2012} \citation{chatr-aryamontri_biogrid_2017} \citation{wang_sequence_2012,neph_expansive_2012,consortium_integrated_2012,guo_high_2012} \citation{chatr-aryamontri_biogrid_2017} \citation{ghirlando_ctcf:_2016} \citation{ong_ctcf:_2014} \@writefile{toc}{\contentsline {section}{\numberline {3.5}CTCF and JunD interactomes}{43}{section.3.5}} \@writefile{lof}{\contentsline {figure}{\numberline {3.6}{\ignorespaces \textbf {CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf {B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref {encode_peaks_methods_data}).\relax }}{44}{figure.caption.20}} \newlabel{encode_peaks_ctcf_association}{{3.6}{44}{\textbf {CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf {B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref {encode_peaks_methods_data}).\relax }{figure.caption.20}{}} \@writefile{lot}{\contentsline {table}{\numberline {3.1}{\ignorespaces \textbf {Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite {wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep {chatr-aryamontri_biogrid_2017}.\relax }}{45}{table.caption.21}} \newlabel{encode_peaks_association_table}{{3.1}{45}{\textbf {Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite {wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep {chatr-aryamontri_biogrid_2017}.\relax }{table.caption.21}{}} \citation{wang_sequence_2012,neph_expansive_2012,consortium_integrated_2012,guo_high_2012,chatr-aryamontri_biogrid_2017} \citation{gaffney_controls_2012} \citation{gaffney_controls_2012} \citation{boller_defining_2018} \citation{hagman_early_2005} \citation{maier_early_2004,boller_pioneering_2016} \@writefile{toc}{\contentsline {section}{\numberline {3.6}EBF1 binds nucleosomes}{47}{section.3.6}} \@writefile{lof}{\contentsline {figure}{\numberline {3.7}{\ignorespaces \textbf {EBF1 binding sites} stand on the edge of a nucleosome. \textbf {A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep {gaffney_controls_2012}. \textbf {B} Dinucleotide frequencies around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. \textbf {C} Motif frequency around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{48}{figure.caption.22}} \newlabel{encode_peaks_ebf1}{{3.7}{48}{\textbf {EBF1 binding sites} stand on the edge of a nucleosome. \textbf {A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep {gaffney_controls_2012}. \textbf {B} Dinucleotide frequencies around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. \textbf {C} Motif frequency around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.22}{}} \citation{trifonov_cracking_2011} \citation{ioshikhes_variety_2011,trifonov_cracking_2011,gaffney_controls_2012} \citation{boller_pioneering_2016} \citation{dreos_mga_2018} \citation{gerstein_architecture_2012} \citation{mathelier_jaspar_2014} \citation{kulakovskiy_hocomoco:_2016} \citation{jolma_dna-binding_2013} \@writefile{toc}{\contentsline {section}{\numberline {3.7}Discussion}{50}{section.3.7}} \@writefile{toc}{\contentsline {section}{\numberline {3.8}Methods}{50}{section.3.8}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.1}Data and data processing}{50}{subsection.3.8.1}} \newlabel{encode_peaks_methods_data}{{3.8.1}{50}{Data and data processing}{subsection.3.8.1}{}} \citation{gaffney_controls_2012} \citation{boyle_high-resolution_2008} \citation{dreos_eukaryotic_2017} \citation{siepel_evolutionarily_2005} \citation{ambrosini_chip-seq_2016} \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.2}Classification of MNase patterns}{51}{subsection.3.8.2}} \newlabel{encode_peaks_em_mnase}{{3.8.2}{51}{Classification of MNase patterns}{subsection.3.8.2}{}} \citation{nair_probabilistic_2014} \citation{zhang_canonical_2014} \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.3}Quantifying nucleosome array intensity from classification results}{52}{subsection.3.8.3}} \citation{ambrosini_chip-seq_2016} \citation{ambrosini_chip-seq_2016} \newlabel{encode_peaks_equation_shift_density1}{{3.3}{53}{Quantifying nucleosome array intensity from classification results}{equation.3.8.3}{}} \newlabel{encode_peaks_equation_shift_density2}{{3.4}{53}{Quantifying nucleosome array intensity from classification results}{equation.3.8.4}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.4}Peak colocalization}{53}{subsection.3.8.4}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.5}NDR detection}{54}{subsection.3.8.5}} \newlabel{encode_peaks_algo_ndr_extend}{{1}{55}{NDR detection}{algocfline.1}{}} \@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Searches the coordinates of the NDR using the posterior nucleosome and nucleosome free class probabilities, for a region $R_i$, from its central position.\relax }}{55}{algocf.1}} \citation{mathelier_jaspar_2014} \citation{kulakovskiy_hocomoco:_2016} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_chip-seq_2016-1} \citation{ambrosini_chip-seq_2016-1} \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.6}CTCF and JunD interactors}{56}{subsection.3.8.6}} \citation{ambrosini_chip-seq_2016-1} \citation{gaffney_controls_2012} \citation{ambrosini_signal_2003} \@writefile{toc}{\contentsline {subsection}{\numberline {3.8.7}EBF1 and nucleosome}{57}{subsection.3.8.7}} \@setckpt{main/ch_encode_peaks}{ \setcounter{page}{59} \setcounter{equation}{8} \setcounter{enumi}{8} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{3} \setcounter{section}{8} \setcounter{subsection}{7} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{7} \setcounter{table}{1} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{0} \setcounter{lstnumber}{1} \setcounter{Item}{8} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{28} \setcounter{algocfline}{1} \setcounter{algocfproc}{1} \setcounter{algocf}{1} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/main/ch_encode_peaks.tex b/main/ch_encode_peaks.tex index 7d5e981..b1c07a9 100644 --- a/main/ch_encode_peaks.tex +++ b/main/ch_encode_peaks.tex @@ -1,484 +1,484 @@ \cleardoublepage \chapter{ENCODE peaks analysis} \label{encode_peaks} \markboth{ENCODE peaks analysis}{ENCODE peaks analysis} \addcontentsline{toc}{chapter}{ENCODE peaks analysis} % Modeling a TF sequence specificity only allows to partially understand how a TF binds a region. Indeed, scanning a genome using a PWM for putative binding sites often returns tens of thousands of sites with only a subset of them being really occupied within a cell. Other elements such as chromatin organization and composition are likely to drive TF binding. Thus gaining a better understanding about the chromat % The exact mechanisms at play remain unclear but nucleosome occupancy is thought to shelter DNA sequence - as some bases are facing the core octamer or to distort the DNA structure - impeding sequence recognition by TFs. In vivo, evidences for competition between TFs and nucleosomes have been collected. Computational simulations accounting for simultaneous multiple factor binding on DNA suggested that nucleosome occupancy and TFs binding influence each other and that TF binds nucleosome depleted regions \cite{wasson_ensemble_2009}. As discussed in Chapter \ref{intro}, the structure of the chromatin has a deep impact on TF binding. It is now clear that nucleosome occupancy fulfills more than a packaging role. It can also acts as a barrier to impede DNA reading processes and compete with TFs for sequence occupancy. Thus gaining a better understanding of how chromatin is organized around TF binding sites is crucial to understand TF binding beyond their sequence specificity only. In an effort to better understand how the genome is organized and how its functions are fulfilled, the ENCODE Consortium \citep{consortium_integrated_2012} released an impressive collection of coherent data representing an unprecedented picture of the chromatin in several human cell lines. The GM12878 cells were chosen as one of the highest priority cell line. GM12878 retained the ability to divide but show a normal karyotype - unlike HeLa cells. Additionally, their genome has been sequences. All together, these features make of GM12878 cells a good model for genomic studies. \section{Data} % number of peaks per dataset \begin{figure} \begin{center} \includegraphics[scale=0.3]{images/ch_encode_peaks/peaklist_peaknumber_GM12878.png} - \captionof{figure}{\textbf{Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000.} + \captionof{figure}{\textbf{Number of peaks in GM12878} called by ENCODE for each ChIP-seq experiment. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed lines indicate 20'000 and 40'000 peaks respectively.} \label{encode_peaks_gm12878_peak_number} \end{center} \end{figure} % proportion of peaks with motif per dataset \begin{figure} \begin{center} \includegraphics[scale=0.3]{images/ch_encode_peaks/peaklist_proportions_GM12878.png} - \captionof{figure}{\textbf{Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep{mathelier_jaspar_2014}, HOCOMOCO v10 \citep{kulakovskiy_hocomoco:_2016} or Jolma \citep{jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.} + \captionof{figure}{\textbf{Proportion of peaks with a motif in GM12878}, for each ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM modeling the TF binding specificity. Each TF was associated to a log-odd PWM contained either from JASPAR Core vertebrate 2014 \citep{mathelier_jaspar_2014}, HOCOMOCO v10 \citep{kulakovskiy_hocomoco:_2016} or Jolma \citep{jolma_dna-binding_2013} collection. If a motif instance (with a score corresponding to a pvalue higher or equal to $1\cdot10^{-4}$) could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep{cheng_understanding_2012} : sequence specific TF (TFSS), chromatin structure (ChromStr) and others. The horizontal dashed line indicates 0.5.} \label{encode_peaks_gm12878_motif_prop} \end{center} \end{figure} -During its production phase in 2012, the ENCODE Consortium released ChIP-seq data 53 different TFs, nucleosome occupancy data (MNase-seq, \cite{kundaje_ubiquitous_2012}) and chromatin accessiblity data (DNase-seq, \citep{thurman_accessible_2012}) that were generated with a depth of coverage in GM12878 cells. The ENCODE Consortium also released ChIP-seq peaks called using a uniform processing pipeline \citep{gerstein_architecture_2012}. These peaks account for i) technical variability as they they are called from technical replicates and ii) inter peak caller discrepancies as several peak callers results were integrated together as part of the peak calling pipeline. These peaks are thus reproducible and robust to software related biases and can be considered as an excellent standard. +During its production phase in 2012, the ENCODE Consortium released ChIP-seq data for 53 different TFs, nucleosome occupancy data (MNase-seq, \cite{kundaje_ubiquitous_2012}) and chromatin accessiblity data (DNase-seq, \citep{thurman_accessible_2012}) that were generated with a high depth of coverage in GM12878 cells. The ENCODE Consortium also released ChIP-seq peaks called using a uniform processing pipeline \citep{gerstein_architecture_2012}. These peaks account for i) technical variability as they are called from technical replicates and ii) inter peak caller discrepancies as several peak callers results were integrated together as part of the peak calling pipeline. These peaks are thus reproducible and robust to software related biases and can be considered as an excellent standard. All data were taken from the MGA repository. The ChIP-seq peaks can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/Uniform-TFBS/Uniform-TFBS.html}, the MNase-seq data at \url{https://ccg.epfl.ch/mga/hg19/encode/GSE35586/GSE35586.html} and the DNase-seq at \url{https://ccg.epfl.ch/mga/hg19/encode/UW-DNaseI-HS/UW-DNaseI-HS.html}. -The number of peaks called for each TF was highly variable and likely reflects each factor activity in this cell line (Figure \ref{encode_peaks_gm12878_peak_number}). The most abundant factor in terms of peaks was RUNX3 followed by CTCF. This observation fits to BioGPS \citep{wu_biogps:_2016} data which indicates that both RUNX3 and CTCF have a higher expression in lymphoblast and in B cells compared to other tissues. Moreover, the propensity of each TF to bind through their motifs was also variable, with again CTCF being showing the highest values \ref{encode_peaks_gm12878_motif_prop}. +The number of peaks called for each TF was highly variable and likely reflects each factor activity in this cell line (Figure \ref{encode_peaks_gm12878_peak_number}). The most abundant factor in terms of peaks was RUNX3 followed by CTCF. This observation fits to BioGPS \citep{wu_biogps:_2016} data which indicates that both RUNX3 and CTCF have a higher expression in lymphoblast and in B cells compared to other tissues. Moreover, the propensity of each TF to bind through their motifs was also variable, with again CTCF being showing the highest values (Figure \ref{encode_peaks_gm12878_motif_prop}). \section{ChIPPartitioning : an algorithm to identify chromatin architectures} \label{encode_peaks_chippartitioning} % Discovering archetypical chromatin architectures over a set of regions of interest - let's say containing a TF binding site in their middle - is a long standing problem in bioinformatics. More formerly, given a matrix $R$ of dimensions $NxL$ containing $N$ vectors of read counts $r_{1}, r_{2}, ..., r_{N}$ of length $L$, each containing the number of reads mapping at a given position in a given region, find $K \leq N$ vectors of length $L' \leq L$ that contain archetypical signals found in the $N$ regions of $R$. This can actually be solved using clustering methods which groups regions that look alike into $K$ groups. The summary of the signal inside each group - for instance the mean signal for the K-means algorithm - can then be interpreted as the archetypical chromatin architectures. Biologically, different organization may reflect different functions. % First, the $N$ regions of interest are usually aligned with respect to a feature of interest, for instance a TF binding sites. However, he chromatin features of interest - for instance the nucleosomes - may not be aligned from one region to the next. This can originate because i) of the true binding sites being fuzzely distributed around the center of the regions, ii) the chromatin features appear at a varying distance from the region centers or iii) both. Comparing two regions then necessitate to first realign the chromatin features. Second, the regions can show a functional orientation. For instance, TF binding sites have an upstream and a downstream with respect to the bound sequence. Orienting properly the regions is also required to properly compare the chromatin organizations in two regions. Finally, the signal over some regions may be sparse because of a sub-optimal sequencing depth. % The study of signal distribution over genomic regions has been a quite active field for bulk sequencing experiments during the last decade. Dedicated algorithms \citep{hon_chromasig:_2008,nielsen_catchprofiles:_2012,kundaje_ubiquitous_2012,nair_probabilistic_2014,groux_spar-k:_2019} have been developed to cluster genomic regions based on their distribution of reads. % Most of these algorithms and softwares deal with some of these issues cited above. However, the algorithm developed by \citep{nair_probabilistic_2014} - which I will call ChIPPartitioning - is probably the best. ChIPPartitioning is a probabilistic partitioning method that softly clusters a sets of genomic regions based on their signal shape (as opposed to the absolute values) resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. -As discussed in section \ref{intro_pattern_discovery}, pattern discovery is a long standing bioinformatic problem and several algorithms have been proposed to solve it. ChIPPartitioning \citep{nair_probabilistic_2014} is probably the best of them. It is a probabilistic partitioning algorithm that softly clusters a sets of genomic regions based on their signal shape (as opposed to the absolute values) resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. +As discussed in section \ref{intro_pattern_discovery}, pattern discovery is a long standing bioinformatic problem and several algorithms have been proposed to solve it. ChIPPartitioning \citep{nair_probabilistic_2014} is probably the best of them. It is a probabilistic partitioning algorithm that softly clusters a set of genomic regions based on their signal shape (as opposed to the absolute values) resemblance. To ensure proper comparisons between the regions, the algorithm allows to offset one region compare to the other to retrieve a similar signal at different offsets and to flip the signal orientation. Finally, it has been demonstrated to be really robust to sparse data. -ChIPPartitioning (a graphic representation of the algorithm can be found further below in Figure \ref{atac_seq_em}) models the signal over $N$ region of length $L$ as having being sampled from a mixture of $K$ different signal models (classes), using $L$ independent Poisson distributions for each region. The number of reads sequenced over this region is then the result of this sampling process. Each class model is represented by a vector $c_{k}$ of size $L' \leq L$ that represent the expected number of reads at each position for that class. These values are thus the Poisson distribution parameters. The number of reads $r_{i,j}$ at position $j$, in a region $i$ is : +ChIPPartitioning (a graphic representation of the algorithm can be found further below in Figure \ref{atac_seq_em}) models the signal over $N$ region of length $L$ as having being sampled from a mixture of $K$ different read density models (classes), using $L$ independent Poisson distributions for each region. The number of reads sequenced over this region is then the result of this sampling process. Each class model is represented by a vector $C_{k}$ of size $L' \leq L$ that represent the expected number of reads at each position for class $k$. These values are thus the Poisson distribution parameters. The number of reads $r_{i,j}$ at position $j$, in a region $i$ is : \begin{equation} \label{encode_peaks_eq_em_data_model} r_{i,j} = \sum_{k=1}^{K} p_{k} \times X_{i,j,k} \end{equation} -where $p_{k}$ is the probability of the k-th class and $X_{i,j,k}$ the number of reads sampled from $Poisson(\lambda=c_{k,j})$. +where $p_{k}$ is the probability of the class $k$ and $X_{i,j,k}$ the number of reads sampled from $Poisson(\lambda=c_{k,j})$. In order to discover the $K$ different class models - that are the chromatin signatures to find - in the data, the algorithm proceed to a maximum likelihood estimation of the Poisson distribution parameters $c_{1}, c_{2}, ..., c_{k}$ and the class probabilities $p_{1}, p_{2}, ..., p_{k}$ using an expectation-maximization (EM) framework. During the E-step, the likelihood $P(r_{i}|c_{k})$ of each region $i$, given each class $k$ and a posterior probability $P(c_{k}|r_{i})$ are computed. The posterior probabilities are interpreted as the probability that $r_{i}$ belongs to class $k$. Eventually, during the M-step, the class models $c_{1}, c_{2}, ..., c_{k}$ are updated using : \begin{equation} \label{encode_peaks_eq_em_update} c_{k,j} = \sum_{i=1}^{N} p_{k} \times r_{i,j} \end{equation} This procedure is actually a weighted and ungaped data alignment in which the posterior probabilities are the weights with the class models containing the average number of reads at each position of the alignment. Since each region is computed a probability to belong to each class, it participates to the update of all the class models, with different weights. -If the length of the chromatin signature searched $L' 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf{B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf{C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref{encode_peaks_methods_data}).} \label{encode_peaks_ctcf_association} \end{center} \end{figure} \begin{table} \begin{center} \begin{tabular}{ |c|c|c|l|l|c|c| } \hline \multicolumn{7}{|c|}{Curated associations} \\ \hline TF$_{A}$ & TF$_{B}$ & Motif ass. & Type & Binder & Reported & Validated \\ \hline CTCF & ATF2 & pos & indep.co-bind & & no & no \\ CTCF & EBF1 & pos & indep.co-bind & & yes & no \\ CTCF & MAZ & pos & indep.co-bind & & yes & no \\ CTCF & NFYb & pos & indep.co-bind & & yes & no \\ CTCF & NFkB & pos & indep.co-bind & & yes & no \\ CTCF & PAX5 & pos & indep.co-bind & & yes & no \\ CTCF & SP1 & pos & indep.co-bind & & yes & no \\ CTCF & BATF & neg & indir.co-bind & BATF & yes & no \\ CTCF & ELF1 & neg & indir.co-bind & ELF1 & yes & no \\ CTCF & IRF4 & neg & indir.co-bind & CTCF & yes & no \\ CTCF & MEF2a & neg & indir.co-bind & both & yes & no \\ CTCF & MEF2c & neg & indir.co-bind & both & yes & no \\ CTCF & NFATc & neg & indir.co-bind & CTCF & no & no \\ CTCF & NFYa & neg & indir.co-bind & CTCF & yes & no \\ CTCF & NRF1 & neg & indir.co-bind & CTCF & yes & no \\ CTCF & NRSF & neg & indir.co-bind & CTCF & yes & no \\ CTCF & PAX5 & neg & indir.co-bind & both & yes & no \\ CTCF & POU2f & neg & indir.co-bind & POU2f & yes & no \\ CTCF & RUNX3 & neg & indir.co-bind & both & no & no \\ CTCF & SRF & neg & indir.co-bind & CTCF & yes & no \\ CTCF & USF1 & neg & indir.co-bind & both & yes & no \\ CTCF & YY1 & neg & indir.co-bind & CTCF & yes & yes\\ CTCF & ZNF143 & neg & indir.co-bind & CTCF & yes & no \\ \hline JunD & BHLHE40 & neg & indir.co-bind & BHLHE40 & yes & no \\ JunD & CTCF & neg & indir.co-bind & CTCF & yes & no \\ JunD & EBF1 & neg & indir.co-bind & EBF1 & yes & no \\ JunD & EGR1 & neg & indir.co-bind & EGR1 & yes & yes\\ JunD & ELK1 & neg & unknown & & no & no \\ JunD & IRF4 & neg & indir.co-bind & JunD & yes & yes\\ JunD & MAZ & neg & indir.co-bind & MAZ & no & no \\ JunD & PAX5 & neg & indir.co-bind & PAX5 & yes & no \\ JunD & SP1 & neg & indir.co-bind & SP1 & yes & yes\\ JunD & USF2 & neg & indir.co-bind & USF2 & yes & no \\ JunD & YY1 & neg & indir.co-bind & & yes & yes\\ JunD & ZBTB33 & neg & unknown & & yes & no \\ \hline \end{tabular} \captionof{table} { \textbf{Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep{chatr-aryamontri_biogrid_2017}.} \label{encode_peaks_association_table} \end{center} \end{table} The study of co-binding with CTCF showed that it was possible to detect global associations. I already detected that the cohesin complex members SMC3 and RAD21 form a complex with CTCF, as expected from literature \citep{ghirlando_ctcf:_2016}. Additionally, I detected that YY1 and ZNF143 are also frequently associated with CTCF, which has also been reported \citep{ong_ctcf:_2014}. Thus, I decided to push forward in this direction. To this end, I set up a method based on motif co-occurrence to i) relieve the necessity of observing similar chromatin architectures, as in the previous section and ii) be able to functionally characterize the detected interactions. -As previously discussed (see section \ref{intro_tf_cobinding}), several types of functional interactions between two TFs $A$ and $B$ exist : direct co-binding, indirect co-binding, independent co-binding and interference. Because the binding mechanisms are different from each other, different observations are expected. In the case of direct co-binding, both TF motifs are expected to appear in close vicinity, more often than by chance. Moreover, a spatial constrain (both spacing and orientation) reflecting the complex structure is also expected to occur. In the case of indirect co-binding, if TF$_{A}$ is the factor binding its motif and TF$_{B}$ is the tethering factor, both motifs are expected to repel (avoid) each other at TF$_{A}$ binding sites. In the case of independent co-binding, both motif$_{A}$ and motif$_{B}$ are expected to be enriched at both TF$_{A}$ and TF$_{B}$ binding sites. However, no spatial constrain is expected between the motifs. Finally, in the case of interference, both motifs are expected to overlap. However, this may not be difficult to detect. +As previously discussed (see section \ref{intro_tf_cobinding}), several types of functional interactions between two TFs $A$ and $B$ exist : direct co-binding, indirect co-binding, independent co-binding and interference. Because the binding mechanisms are different from each other, different observations are expected. In the case of direct co-binding, both TF motifs are expected to appear in close vicinity, more often than by chance. Moreover, a spatial constrain (both spacing and orientation) reflecting the complex structure is also expected to occur. In the case of indirect co-binding, if TF$_{A}$ is the factor binding its motif and TF$_{B}$ is the tethering factor, both motifs are expected to repel (avoid) each other at TF$_{A}$ binding sites. In the case of independent co-binding, both motif$_{A}$ and motif$_{B}$ are expected to be enriched at both TF$_{A}$ and TF$_{B}$ binding sites. However, no spatial constrain is expected between the motifs. Finally, in the case of interference, both motifs are expected to overlap. However, this may be difficult to detect. % Several types of functional associations can occur between a TF$_{A}$ and a TF$_{B}$. Because each one of them brings different expected patterns in the data, it should be possible to detect and disentangle them. First two TFs can dimerize and bind to DNA using both DNA binding domains (DBDs) [REFERENCE NEEDED] (Figure \ref{encode_peaks_tf_association}A). I will refer to this case as \textbf{direct co-binding}. If this happens, both TF motifs are expected to appear in close vicinity, more often than by chance. Moreover, a spatial constrain (both spacing and orientation) reflecting the complex structure is also expected to occur. Second, two TFs can dimerize and bind to DNA using only one of the DBDs. This will result in having one of the TF bound to DNA while the other one will tether DNA through its interaction with the other TF (Figure \ref{encode_peaks_tf_association}B). This case will be referred to as \textbf{indirect co-binding}. In such a case, if TF$_{A}$ is the factor binding its motif and TF$_{B}$ is the tethering factor, both motifs are expected to repel (avoid) each other at TF$_{A}$ binding sites. Third, two TFs can both bind DNA using their own DBDs, in close vicinity but without any physical interaction (Figure \ref{encode_peaks_tf_association}C). In such as case, both motif$_{A}$ and motif$_{B}$ are expected to be enriched at both TF$_{A}$ and TF$_{B}$ binding sites. However, no spatial constrain is expected between the motifs. This case will be refered to as \textbf{independent co-binding}. This can be caused by a temporal relationship between both TFs where both TFs can bind to a given region asynchronously. For instance, a first TF is recruited to its binding site and ensures - somehow - a proper chromatin environment for another TF, such as illustrated during macrophage and B cells progenitors commitment \citep{heinz_simple_2010}. Finally, in case of a partial or total motif overlap, both TFs may be observed to be bound together (Figure \ref{encode_peaks_tf_association}D). In such a case, different phenomenons may explain this observation. A first possible explanation would be that two TFs compete to bind to the same region. Observing both TFs bound together could be due to an overlap of data from different cells in which only one TF is bound at the time. A second possible explanation would be that, for some reason, only one TF is bound, never the other. However, I prefer to be cautious regarding the causal mechanisms and this case will be referred to as an \textbf{interference}. -In order to collect more evidences about functional connections between TFs, I developed a simple analysis pipeline able to detect the expected patterns of motifs described above. Briefly, given a set of binding sites for a TF$_{A}$, it is possible to construct a contingency matrix containing the number of binding site with i) motif$_{A}$ and motif$_{B}$, ii) motif$_{A}$ only, iii) motif$_{B}$ only or iv) no motif and assess whether both motifs are associated or avoid each other using an exact Fisher test. Then, for pairs of motifs showing an association, displaying the spatial distribution of the motif may help to discriminate whether or not there is a spacing constrain or a motif overlap. +In order to collect more evidence about functional connections between TFs, I developed a simple analysis pipeline able to detect the expected patterns of motifs described above. Briefly, given a set of binding sites for a TF$_{A}$, it is possible to construct a contingency matrix containing the number of binding site with i) motif$_{A}$ and motif$_{B}$, ii) motif$_{A}$ only, iii) motif$_{B}$ only or iv) no motif and assess whether both motifs are associated or avoid each other using an exact Fisher test. Then, for pairs of motifs showing an association, displaying the spatial distribution of the motif may help to discriminate whether or not there is a spacing constrain or a motif overlap. -I investigated the association of 47 TFs for which 53 datasetes were available in GM12878 cells with CTCF or JunD. CTCF was chosen because i) most of its binding sites have a short nucleosome depleted region and show only a peak of sequence conservation at the binding site leaving a restricted space for other motifs to co-occur (Figure \ref{suppl_encode_peaks_em_ctcf}) and ii) I already collected several observation regarding CTCF. JunD was chosen as a complementary example to CTCF in the sense that i) contrarily to CTCF, it is only a trancriptional regulator, ii) it is expected to bind to regulatory regions mostly thus to open chromatin regions where other motifs are expected to co-occur , iii) \url{~50}\% of the peaks have a motif versus \url{~80}\% to \url{~90}\% for CTCF peaklists (Figure \ref{encode_peaks_gm12878_motif_prop}). +I investigated the association of 47 TFs for which 53 datasetes were available in GM12878 cells with CTCF or JunD. CTCF was chosen because i) most of its binding sites have a short nucleosome depleted region and show a sharp peak of sequence conservation at the binding site leaving a restricted space for other motifs to co-occur (Figure \ref{suppl_encode_peaks_em_ctcf}) and ii) I already collected several observation regarding CTCF. JunD was chosen as a complementary example to CTCF in the sense that i) contrarily to CTCF, it is only a trancriptional regulator, ii) it is expected to bind to regulatory regions mostly, thus to open chromatin regions where other motifs are expected to be present, iii) \url{~50}\% of the peaks have a motif versus \url{~80}\% to \url{~90}\% for CTCF peaklists (Figure \ref{encode_peaks_gm12878_motif_prop}). % motif co occurence -Motif co-occurrence analysis suggested several interactions. Regarding CTCF motif (Figure \ref{encode_peaks_ctcf_association}A), 8 positive motif association (ATF2, EBF1, MAZ, NFYb, NFkB, PAX5, SP1, YY1) and 16 negative motif associations (BATF, ELF1, IRF4, MEF2a, MEF2c, NFATc, NFYa, NRF1, NRSF/REST, PAX5, POU2F2/OCT2, RUNX3, SRF, USF1, YY1 and ZNF143) with other motifs were found. Regarding JunD (Figure \ref{suppl_encode_peaks_jund_association}A), positive motif association with 2 others TF motifs (BATF, cFos) and 12 negative associations with others TF motifs (ATF2, BHLHE40, CTCF, EBF1, EGR1, ELK1, IRF4, MAZ, PAX5, SP1, USF2, YY1 and ZBTB33) were found. cFos and one of the YY1-Sydh peaklists displayed evidences of poor quality (not shown and annotated as such by the ENCODE Consortium). Additionally, ATF2 is an AP1 member which possess a 2bp spacer (TGANNTCA) while JunD is a 1bp motif space (TGANTCA). Thus the strong negative interaction may simply be due to the fact that both motifs are simply mutually exclusive. In consequence, the positive associations CTCF-YY1 and JunD-cFos and the negative association JunD-ATF2 should be ignored. Additionally, JunD and BATF motifs are the same as both these TFs belong to the AP1 family. In consequence, it is impossible to say whether BATF peaks harbour a JunD or a BATF site. Thus this association should be ignored as well, leaving no positive association left with JunD motif. +The motif co-occurrence analysis suggested several interactions. Regarding CTCF motif (Figure \ref{encode_peaks_ctcf_association}A), 8 positive motif association (ATF2, EBF1, MAZ, NFYb, NFkB, PAX5, SP1, YY1) and 16 negative motif associations (BATF, ELF1, IRF4, MEF2a, MEF2c, NFATc, NFYa, NRF1, NRSF/REST, PAX5, POU2F2/OCT2, RUNX3, SRF, USF1, YY1 and ZNF143) were found. Regarding JunD (Figure \ref{suppl_encode_peaks_jund_association}A), positive motif association with 2 others TF motifs (BATF, cFos) and 12 negative associations with others TF motifs (ATF2, BHLHE40, CTCF, EBF1, EGR1, ELK1, IRF4, MAZ, PAX5, SP1, USF2, YY1 and ZBTB33) were found. cFos and one of the YY1-Sydh peaklists displayed evidence of poor quality (not shown and annotated as such by the ENCODE Consortium). Additionally, ATF2 belongs to the members of the AP1 family that have a 2bp spacer (TGANNTCA) while JunD has a 1bp spacer (TGANTCA). Thus the strong negative interaction may simply be due to the fact that both motifs are simply mutually exclusive. In consequence, the positive associations CTCF-YY1 and JunD-cFos and the negative association JunD-ATF2 should be ignored. Additionally, JunD and BATF motifs are the same as both these TFs belong to the AP1 family. In consequence, it is impossible to say whether BATF peaks harbour a JunD or a BATF site. Thus this association should be ignored as well, leaving no positive association left with JunD motif. % densities -The analysis of CTCF and JunD motif occurrence densities (Figures \ref{encode_peaks_ctcf_association}B and C and Figure \ref{suppl_encode_peaks_jund_association}B and C) revealed further interesting details regarding possible association mechanisms. First, positive associations showed CTCF density patterns mostly compatible with the direct co-binding and the independent co-binding scenarios (see Figure \ref{encode_peaks_ctcf_association}B). However, making a clear distinction between both is often impossible. For instance, both EBF1 peaklists show a decrease in CTCF motif density \url{~10}bp after the peak followed by an increase which could represent the spacer between CTCF and EBF1. However this is followed by a rather wide CTCF motif presence, mostly suggesting an independent co-binding scenario. An interesting candidate for a direct co-binding with CTCF is RXRa (Figure \ref{encode_peaks_ctcf_association}B). Even though the motif association was not significant, a focused co-localization of both motif appears. Second, negative associations showed CTCF and JunD density patterns compatible with the indirect co-binding scenario where the TFs would tether through CTFC or JunD, i.e. the CTCF or JunD motifs do not show a spacing constrain with the binding sites but are rather spread over ~100bp around binding sites without their own motif (Figure \ref{encode_peaks_ctcf_association}C and Figure \ref{suppl_encode_peaks_jund_association}C). Interestingly, CTCF motif around YY1 and ZNF143 binding sites lacking their own motifs (see bottom of Figure \ref{encode_peaks_ctcf_association}C) showed really focused densities, indicating that for some reason, the CTCF motif is well localized. Even if unexpected, this observation is not incompatible with the indirect co-binding scenario and further supports the results from section \ref{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}. +The analysis of CTCF and JunD motif occurrence densities (Figures \ref{encode_peaks_ctcf_association}B and C and Figure \ref{suppl_encode_peaks_jund_association}B and C) revealed further interesting details regarding possible association mechanisms. First, positive associations showed CTCF density patterns mostly compatible with the direct co-binding and the independent co-binding scenarios (see Figure \ref{encode_peaks_ctcf_association}B). However, making a clear distinction between both is often really difficult. For instance, both EBF1 peaklists showed a decreased in CTCF motif density \url{~10}bp after the peak, followed by an increase which could represent the spacer between CTCF and EBF1. However this is followed by a rather wide CTCF motif presence, mostly suggesting an independent co-binding scenario. An interesting candidate for a direct co-binding with CTCF is RXRa (Figure \ref{encode_peaks_ctcf_association}B). Even though the motif association was not significant, a focused co-localization of both motif appears. Second, negative associations showed CTCF and JunD density patterns compatible with the indirect co-binding scenario where the TFs would tether through CTFC or JunD, i.e. the CTCF or JunD motifs do not show a spacing constrain with the binding sites but are rather spread over ~100bp around binding sites without their own motif (Figure \ref{encode_peaks_ctcf_association}C and Figure \ref{suppl_encode_peaks_jund_association}C). Interestingly, CTCF motif around YY1 and ZNF143 binding sites lacking their own motifs (see bottom of Figure \ref{encode_peaks_ctcf_association}C) showed really focused densities, indicating that for some reason, the CTCF motif is well localized. Even if unexpected, this observation is not incompatible with the indirect co-binding scenario and further supports the results from section \ref{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}. % results -To summarize, the motif association statistics allowed me to identify 35 associations of TFs with either CTCF or JunD (Table \ref{encode_peaks_association_table}). The strongest negative interactions for CTCF were ZNF143 and YY1, supporting the results found in the previous sections. The analysis of CTCF and JunD motif spatial distributions around peaks and a closer examination of the contingency matrices allowed to suggest details about the interacting mechanisms, including which TF binds DNA. The only two exceptions were JunD-ELK1 and JunD-ZBTB33 for which the motif occurrence densities were uninformative. Finally, out of these 35 associations, 5 were supported by experimental evidences and 5 were not already reported in previous studies or databases \citep{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012, chatr-aryamontri_biogrid_2017}. +To summarize, the motif association statistics allowed me to identify 35 associations of TFs with either CTCF or JunD (Table \ref{encode_peaks_association_table}). The strongest negative interactions for CTCF were ZNF143 and YY1, supporting the results found in the previous sections. The analysis of CTCF and JunD motif spatial distributions around peaks and a closer examination of the contingency matrices allowed to suggest details about the interacting mechanisms, including which TF binds DNA. The only two exceptions were JunD-ELK1 and JunD-ZBTB33 for which the motif occurrence densities were uninformative. Finally, out of these 35 associations, 5 were supported by experimental evidence and 5 have not already reported in previous studies or databases \citep{wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012, chatr-aryamontri_biogrid_2017}. \section{EBF1 binds nucleosomes} \begin{figure} \begin{center} \includegraphics[scale=0.4]{images/ch_encode_peaks/ebf1_haib_1.png} \captionof{figure}{\textbf{EBF1 binding sites} stand on the edge of a nucleosome. \textbf{A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep{gaffney_controls_2012}. \textbf{B} Dinucleotide frequencies around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. \textbf{C} Motif frequency around the nucleosome dyads from the Gaffney dataset that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.} \label{encode_peaks_ebf1} \end{center} \end{figure} % As presented above (section \ref{encode_peaks_chippartitioning}), EBF1 binding sites does not seem to present a NDR seem to be covered by a nucleosome array. This observation suggest that EBF1 can bind to nucleosomal DNA. However, because ChIPPartitioning realigns the data, one possible explanation is that it failed to properly aligned the data and that the results do not reflect reality. % In order to clarify this, I looked at the MNase digestion profile - more specifically, at the distribution of nucleosome dyads - at EBF1 binding sites. -EBF1 is a crucial factor for B cell development. It is necessary in the early steps, for a proper lineage commitment as well as later on during the entire B cell development \citep{boller_defining_2018}. Since many years, EBF1 has been though to be able to "pioneer early changes in the target gene chromatin necessary for transcriptional activation" and proper B cell development \citep{hagman_early_2005}. Experimental evidences supported that EBF1 could be able to bind compacted naive chromatin (without noticeable mark/modification), leading to a local chromatin opening, H3K4me2 deposition, DNA demethylation and gene activation \citep{maier_early_2004,boller_pioneering_2016}. If such features makes a lot of sense during lineage commitment, the some underlying mechanisms remained mysterious, especially how EBF1 primarily binds to closed chromatin. With regard to this, the results of section \ref{encode_peaks_chippartitioning}, suggesting that EBF1 binding sites may be covered by nucleosome arrays, rose my attention. In order to collect evidences that may shed light on this, I conducted a deeper exploration of the EBF1 binding sites. +EBF1 is a crucial factor for B cell development. It is necessary in the early steps, for a proper lineage commitment as well as later on during the entire B cell development \citep{boller_defining_2018}. Since many years, EBF1 has been though to be able to "pioneer early changes in the target gene chromatin necessary for transcriptional activation" and proper B cell development \citep{hagman_early_2005}. Experimental evidence supported that EBF1 could be able to bind compacted naive chromatin (without noticeable mark/modification), leading to a local chromatin opening, H3K4me2 deposition, DNA demethylation and gene activation \citep{maier_early_2004,boller_pioneering_2016}. If such features makes a lot of sense during lineage commitment, the some underlying mechanisms remained mysterious, especially how EBF1 primarily binds to closed chromatin. With regard to this, the results of section \ref{encode_peaks_chippartitioning}, suggesting that EBF1 binding sites may be covered by nucleosome arrays, rose my attention. In order to collect evidence that may shed light on this, I conducted a deeper exploration of the EBF1 binding sites. -First, the distribution of nucleosome dyads - from two independent experiments - around EBF1 binding sites revealed a landscape that is compatible with a nucleosome positioned ~70bp apart from the binding sites (Figures \ref{encode_peaks_ebf1}A). This configuration would position the EBF1 binding site at the edge of the nucleosome. The 10bp periodicity visible suggested that other positioning of the EBF1 binding site exist but always at integer numbers of helix turn, such that the EBF1 binding site would always be positioned the same compared to the nucleosome surface. Surprisingly, the distribution of EBF1 motif remained the same, whether the nucleosome was containing an EBF1 bound site or not (Figure \ref{suppl_encode_peaks_ebf1_nucl}). +First, the distribution of nucleosome dyads - from two independent experiments - around EBF1 binding sites revealed a landscape that is compatible with a nucleosome positioned ~70bp apart from the binding sites (Figures \ref{encode_peaks_ebf1}A). This configuration would position the EBF1 binding sites at the edge of the nucleosome. The 10bp periodicity visible suggested that other positioning of the EBF1 binding site exist but always at integer numbers of helix turn, such that the EBF1 binding site would always be positioned the same compared to the nucleosome surface. Surprisingly, the distribution of EBF1 motif remained the same, whether the nucleosome was containing an EBF1 bound site or not (Figure \ref{suppl_encode_peaks_ebf1_nucl}). Second, to support the fact that these EBF1 binding sites are indeed functional sites, I compared some of their chromatin features with the entire nucleosome pool. As expected, the presence of EBF1 binding sites was correlated with an increased accessibility (Figure \ref{suppl_encode_peaks_ebf1_chrom}A), even though the opening was spread rather than narrow. Furthermore, this increased opening was concomitant with an enriched H3K4me2 deposition (Figure \ref{suppl_encode_peaks_ebf1_chrom}B), in line with the literature. Last, it was also possible to highlight a higher sequence conservation at the nucleosome edges when they had an EBF1 binding site (Figure \ref{suppl_encode_peaks_ebf1_chrom}C), suggesting a functional difference between both nucleosome pools. % Finally, Trifonov's motif appeared along the nucleosome, EBF1 motif was rather present at the nucleosome edges. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif ({A/C}CCC{A/C} and {A/G}GGG{A/G}) at the cost of 2 or 0 missmatches. % A further inspection of the dinucleotide base composition in the nucleosome bearing an EBF1 binding site revealed a periodic pattern that is compatible with a rotationally positioned nucleosome (Figure \ref{encode_peaks_ebf1}B), as expected from literature in \citep{ioshikhes_variety_2011,gaffney_controls_2012}. % Finally, the occurrence of the nucleosome positioning motif - YRRRRRYYYYYR where Y is C/T and R is A/G - identified by Trifonov \citep{trifonov_cracking_2011} in these nucleosomes is antiphased with the occurrence of the EBF1 motif. If Trifonov's motif appeared along the nucleosome, EBF1 motif was rather present at the nucleosome edges. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif ({A/C}CCC{A/C} and {A/G}GGG{A/G}) at the cost of 2 or 0 missmatches. % These results suggest that EBF1 can bind nucleosomal DNA. In most cases, it seems that the EBF1 binding site is located at its edge. Incidentally, the high similarity between Trifonov and EBF1 motifs suggest that EBF1 binding sequence may have a nucleosome positioning property. Interestingly, EBF1 motif, as identified by JASPAR \ref{suppl_encode_peaks_ebf1_logo}, is 14bp wide. Consequently, it is conceivable that, wherever this motif is located along the nucleosome, at least part of remains facing outward and is thus "readable". % Based on this observation, I hypothesize that EBF1 may be a pioneering factor or that it influence nucleosomes positioning through its binding. In the first case, EBF1 would be able to target yet inaccessible loci upon the right cellular conditions. In the second case, EBF1 would rather serve to both open and close targeted sites by leading - directly or indirectly - to the positing of a nucleosome right beside of it binding site. Both scenarios make sense. Indeed, EBF1 is known to be crucial for B-cells commitment. In such developmental processes, specific enhancers are made accessible and active at different, in a coordinated manner, during the developmental process. (AND WHAT ABOUT CLOSING???) Third, a further inspection of the sequence composition of the nucleosomes bearing an EBF1 binding site revealed i) a periodic occurrence of antiphased WW (W=A/T) and SS (S=C/G) dinucleotides and ii) a periodic occurrence of the YRRRRRYYYYYR (R=A/G, Y=C/T) nucleosome positioning motif described by Trifonov \citep{trifonov_cracking_2011}. Together, these observations suggest that EBF1 binding sites are located on the edge of a rotationally positioned nucleosome \citep{ioshikhes_variety_2011,trifonov_cracking_2011,gaffney_controls_2012}. Interestingly, Trifonov's motif appeared in counter phased with EBF1 motif. A closer look both motifs (see Figure \ref{suppl_encode_peaks_ebf1_logo} for EBF1 logo) revealed that half of Trifonov's motif (RRRRR or YYYYY) matches one half of the EBF1 motif (\{A/C\}CCC\{A/C\} or \{A/G\}GGG\{A/G\}) at the cost of 2 or 0 missmatches. -These results suggest that EBF1 can indeed bind nucleosomal DNA. The motif bound were predominantly located at the edges of the nucleosomes. Yet, this was also the fact for nucleosome that do are not bind by EBF1. This suggests that nucleosomes are already in this position before EBF1 binding, which may be the case given the presence of favorable nucleosome positioning sequences. +These results suggest that EBF1 can indeed bind nucleosomal DNA. The motifs bound were predominantly located at the edges of the nucleosomes. Yet, this was also the fact for nucleosome that are not bound by EBF1. This suggests that nucleosomes are already in this position before EBF1 binding, which may be the case given the presence of favorable nucleosome positioning sequences. -The reason why the EBF1 motif is already on the edges of nucleosome, even without EBF1 binding, remains unknown. One explanation could be that such sites have a double function. The first function would be to recruit EBF1 to open up the region. The second, would be that EBF1 binding sequence (together with other positioning sequences) can act as a barrier - a potential well - avoiding the nucleosome to roll over in this direction. Such a system would have the advantage of promoting a suited chromatin structure in developmentally important regions. Constraining nucleosome movement would could serve to hide regulatory elements. At the same time, these regions would remain responsive to differentiation signals through the exposition of EBF1 sites on the periphery of nucleosomes. +The reason why the EBF1 motif is already on the edges of nucleosome, even without EBF1 binding, remains unknown. One explanation could be that such sites have a double function. The first function would be to recruit EBF1 to open up the region. The second, would be that EBF1 binding sequence (together with other positioning sequences) can act as a barrier - a potential well - avoiding the nucleosome to roll over in this direction. Such a system would have the advantage of promoting a suited chromatin structure in developmentally important regions. Constraining nucleosome movement could serve to hide regulatory elements. At the same time, these regions would remain responsive to differentiation signals through the exposition of EBF1 sites on the periphery of nucleosomes. \section{Discussion} -Overall, the results presented in this section overall complement and support the observations made by other research groups worldwide. +Overall, the results presented in this section complement and support the observations made by other research groups worldwide. % nucleosome arrays and NDR The systematic study of the nucleosome landscape in the viccinity of TFs binding sites highlighted that nucleosome arrays are always present on the flanking regions. However, all the TFs, with the exception of CTCF, do not act as a barrier and thus are not major determinant of the chromatin architecture. Instead, an alternative mechanism, probably involving chromatin remodelers, is likely to be responsible. Furthermore, all TFs were found to bind in NDRs with the noticeable exception of EBF1. % EBF1 -Surprisingly, a large fraction of EBF1 binding sites was found to be occupied by what seemed to be a rotationally positioned nucleosome which edges are bound by EBF1. Furthermore, it appeared that EBF1 binding motif resembles a nucleosome positioning sequence and could be involved in the positioning of the nucleosome. However, at least two alternative scenarios could explain the presence of an EBF1 binding site at the entry of a nucleosome. First, EBF1 genuinely binds to such "pre-positioned" nucleosomes, in which case I am observing EBF1 true binding mechanism. Alternatively, EBF1 binding - to either nucleosomal or naked DNA - results in the positioning of a nucleosome right beside. To my opinion, the previous results suggesting a pioneer function for EBF1 \citep{boller_pioneering_2016} makes the second hypothesis more likely. EBF1 would directly engage a nucleosome and somehow trigger its displacement such that EBF1 binding site will eventually reside at the nucleosome edge. Testing this hypothesis could be performed by assaying in vitro binding of EBF1 to assembled nucleosome arrays. +Surprisingly, a large fraction of EBF1 binding sites was found to be occupied by what seemed to be a rotationally positioned nucleosome which edges were bound by EBF1. Furthermore, it appeared that EBF1 binding motif resembles a nucleosome positioning sequence and could be involved in the positioning of the nucleosome. However, at least two alternative scenarios could explain the presence of an EBF1 binding site at the entry of a nucleosome. First, EBF1 genuinely binds to such "pre-positioned" nucleosomes, in which case I am observing EBF1 true binding mechanism. Alternatively, EBF1 binding - to either nucleosomal or naked DNA - results in the positioning of a nucleosome right beside. To my opinion, the previous results suggesting a pioneer function for EBF1 \citep{boller_pioneering_2016} makes the second hypothesis more likely. EBF1 would directly engage a nucleosome and somehow trigger its displacement such that EBF1 binding site will eventually reside at the nucleosome edge. Testing this hypothesis could be performed by assaying in vitro binding of EBF1 to assembled nucleosome arrays. % CTCF -The study of CTCF binding sites revealed that they can be grouped in i) promoter distal and ii) promoter proximal binding sites. In each of the subset, CTCF was observed to bind with a different group of interactors, suggesting different functions. At promoter distal binding sites CTCF is associated the cohesin complex while at promoter proximal regions, CTCF seems to be associated with ZNF143 and YY1. +The study of CTCF binding sites revealed that they can be grouped in i) promoter distal and ii) promoter proximal binding sites. In each of the subsets, CTCF was observed to bind with a different group of interactors, suggesting different functions. At promoter distal binding sites CTCF is associated the cohesin complex while at promoter proximal regions, CTCF seems to be associated with ZNF143 and YY1. % interaction Finally the study of the motif co-localization, even if simple, seemed quite powerful as it allowed to identify 35 interactions with CTCF or junD. Out of these, 25 have already been proposed but without experimental support, 5 have been proposed and experimentally validated and 5 were new. These 5 new interactions are proposed to be indirect co-binding event and thus imply a physical interaction that can be tested. \section{Methods} \subsection{Data and data processing} \label{encode_peaks_methods_data} All the GM12878 ENCODE data used were mapped against hg19 genome and can be found on the MGA repository \citep{dreos_mga_2018}. Peaks called by the ENCODE Consortium using their uniform processing pipeline \cite{gerstein_architecture_2012} were used. These peaks can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/Uniform-TFBS/Uniform-TFBS.html}. Assuming that a TF binds to DNA through motif recognition, the peak center should be localized on the motif center. Thus the center of each peak was moved to the closest motif instance within 60bp. To do so, each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \cite{mathelier_jaspar_2014}, HOCOMOCO v10 \cite{kulakovskiy_hocomoco:_2016} or Jolma \cite{jolma_dna-binding_2013} collection. Using the corresponding log-odd PWM, peak sequences were scanned to find motif instance with a score corresponding to a pvalue higher or equal to 1e-4. If such a motif instance was found, the peak position was shifted to the center of the motif instance and mapped to the corresponding strand. Otherwise, the peak position remained unchanged without strand information. In GM12878 cells, nucleosome occupancy was assessed using MNase-seq data released by the ENCODE Consortium (GSE35586). These data can be found at \url{https://ccg.epfl.ch/mga/hg19/encode/GSE35586/GSE35586.html}. To increase sequencing depth, all replicates available for this cell line were pooled together, resulting in ~789 mio reads, and used as a single dataset. The resulting dataset is available and has the description "GM12878|Nucleosome|all (SLOW!)". Because each read was represented as a single point coordinate corresponding to their 5' edges, these coordinates were centered by 70bp in order to indicate the nucleosome dyads. Finally, another dataset was used for one analysis only. These data were released by Gaffney and colleagues \cite{gaffney_controls_2012} and can be found at \url{https://ccg.epfl.ch/mga/hg19/gaffney12/gaffney12.html} and were not centered as the coordinates already represent the center of paired-end sequenced fragments. The dataset is labeled "All Paired-end samples - 147bp fragments". Chromatin accessibility was assessed using DNaseI-seq data released by the ENCODE Consortium \cite{boyle_high-resolution_2008} (GSE32970). To increase sequencing depth, all replicates available for GM12878 cells were pooled together, resulting in ~144 mio reads, and used as a single dataset. The individual replicates can found at \url{https://ccg.epfl.ch/mga/hg19/encode/Duke-DNaseI-HS/Duke-DNaseI-HS.html}. The reads were represented as a single point coordinate corresponding the their 5' edges but were not centered as this correspond to the exact DNaseI nick location. The EPDnew release 003 was used as TSS annotation \cite{dreos_eukaryotic_2017} and genome sequence conservation was assessed using Phastcons \cite{siepel_evolutionarily_2005}. Both datasets can be found at \url{https://ccg.epfl.ch/mga/hg19/epd/epd.html} and \url{https://ccg.epfl.ch/mga/hg19/phastcons/phastcons.html} respectively. \subsection{Classification of MNase patterns} \label{encode_peaks_em_mnase} For each TF peaklist MNase, DNase, sequence conservation and TSS density around TF binding site were assessed independently by counting the number of read mapped from -999bp to +1000bp around each peak, using 10bp bins. For each TF, 4 matrices having one row per binding site (peak) and 199 columns were created using ChIP-extract program \citep{ambrosini_chip-seq_2016}. Probabilistic pattern classification was achieved using the ChIPPartitioning (see section \ref{encode_peaks_chippartitioning}). The algorithm was implemented as described in the supplemental materials of \cite{nair_probabilistic_2014}. Two different procedures were used to classified MNase patterns. Both were run for 10 iterations allowing flip and a value of shift of 15 bins. The first procedure aimed to discover 4 different pattern classes, allowing flip and a shift of 15 bins. The procedure was initialized with 4 classes. The class patterns were initialized by assigning each peak a random probability to belong to each of the 4 classes. The patterns were then computed as the weighted average of the signal given the peak class probabilities as weights. Then the prior class probabilities were initialized as $p_{k,s,f} = 1/K*S*2$ where $k$ is the class index, $s$ is the shift value in bins (here 15), $f$ is an indicative variable for the flip state (1 for "normal", 2 for "reverse"), $K$ is the number of classes (here 4) and $S$ is the maximum allowed shift in bins. The classification was run for 10 iterations. At the end, it returned a matrix of dimensions $NxKxSx2$ containing the probabilities for each of the $N$ region to belong to each of the $K$ class, for each possible shift state $S$ and for both flip states ("normal" or "reversed"). The second procedure aimed to discriminate between 2 classes : i) the binding sites describing the "average" binding sites as opposed to ii) those differing from this. To do so, class patterns were initialized to i) the aggregation over all peaks (the average pattern) and ii) a flat pattern being the mean number of counts of the input matrix. Flip and 15 bins of shift were allowed. The prior class probabilities were initialized as $p_{k,s,f} = \mathcal{N}(s,floor(S/2)+1,1)$ where the second and third parameters are the mean and the standard deviation, giving a higher prior probability to states with shift equal to 0bp. \subsection{Quantifying nucleosome array intensity from classification results} Nucleosome array intensity was quantified using a method developed by Zhang and colleagues \citep{zhang_canonical_2014}. Briefly, nucleosome signal is represented in 2 dimensions as a set of signal intensities for a given set of positions. Data are structured as vector $Y$ containing the nucleosome occupancy signal (for instance an EM classification class profile) for $n$ bins (for EM class profiles, 199 bins of 10bp). First, the 1$^{st}$ order derivative $D_{1}$ of $Y$ is computed. Then the 1$^{st}$ order derivative $D_{2}$ of the absolute value of $D_{1}$ is computed. Local maxima in $D_{2}$ are searched using a windows of 15 bins (corresponding to 150bp, a nucleosome width). Maxima can be interpreted as strong drop or enrichment of signal, corresponding to a pattern expected from a well positioned nucleosome array. Finally, all $D_{2}$ maxima are joint by a line and the nucleosome array intensity at each given position is the height of the line at this position. The nucleosome array density for the first and last position of $Y$ were set to 0. The average nucleosome array intensity of $Y$ was used as the nucleosome array value of the input data. The classification of a matrix of counts having $N$ rows (regions), with $K$ classes, allowing a maximum of $S$ shift states and two flip states ("normal" and "reverse") outputs a probability matrix $P$ of dimension [$N$, $K$, $S$, 2] containing the probability for each region to belong to each class, given a shift state and a flip state. This matrix can be used to compute a vector $D_{k}$ of length $S$ containing the probability density of the shift states for a class $k$ using : \begin{equation} \begin{aligned} D_{k,s} & = \frac {\sum_{i=1}^{N} (P_{i,k,s,1} + P_{i,k,s',2})} {\sum_{i=1}^{N} \sum_{s=1}^{S} (P_{i,k,s,1} + P_{i,k,s',2})} \\ \text{with } \\ s' & = S - s + 1 \end{aligned} \label{encode_peaks_equation_shift_density1} \end{equation} \citep{ambrosini_chip-seq_2016} where $s'$ represents the index of the reverse orientation and with the constrain that all the elements of $P$ sum to 1. Given the shift probability density vector $D_{k}$ of one class, computing its standard deviation was done using : \begin{equation} \begin{aligned} \sigma_{k} & = \sqrt { \sum_{i=1}^{S} (X_{i}^{2} \cdot D_{k,i}) - \mu_{k}^{2} }\\ \text{with } \\ \mu_{k} & = \sum_{i=1}^{S} (X_{i} \cdot D_{k,i}) \end{aligned} \label{encode_peaks_equation_shift_density2} \end{equation} where $X$ is a vector containing the position changes in bp for every shift state, i.g. for a maximum number of shift states of 15 ($S=15$) with bins of 10bp, X would contain [-70, -60, ..., 0, ..., +60, +70]. \subsection{Peak colocalization} To measure the extent of colocalization between CTCF, YY1, ZNF143, SMC3 and RAD21, the occurrence of YY1, ZNF143, SMC3 and RAD21 peaks around CTCF peaks was computed using ChIP-extract \citep{ambrosini_chip-seq_2016}. The CTCF peak list used as reference was "wgEncodeAwgTfbsSydhGm12878Ctcfsc15914sc20UniPk" because it was the CTCF peak list containing i) the most CTCF peaks and ii) the highest proportion of peaks with a motif. Chip-extract was run separately for YY1, ZNF143, SMC3 and RAD21 using the following parameters : from -99, to 100, window size 1. Then, the propotion of CTCF peak having at least one other peak within +/-10 bp, 50bp or 100bp was computed. \subsection{NDR detection} Let us consider a matrix of MNase-seq counts $R$ of dimensions $NxL$ containing N vectors of read counts $r_{1}, r_{2}, ..., r_{n}$ of length $L$. Because MNase-seq reads are a direct indication of the nucleosome occupancy, detecting NDRs is about finding low signal regions, flanked by two high signal regions. The signal in each vector $X_i$ (region) is assumed to have been sampled from a 2 class mixture of high (nucleosome) and low (nucleosome-free) signal, using a Poisson distribution. Both classes are expected to occur with a given probability $p^{nucl}_{i}$ and $p^{free}_{i}$. The rows are considered individually to lessen technical biases such as region specific sequencing depth. The class probabilities and their mean parameters are estimated using an EM algorithm. First, during the E-step, for each position inside a region, the posterior probability of the nucleosome given the data is computed using : \begin{equation} \begin{aligned} P(nucl | r_{i,l}) = \frac{p_{i}^{nucl} \times Poisson(r_{i,l}, \lambda=m_{i}^{nucl})} {p_{i}^{nucl} \times Poisson(r_{i,l}, \lambda=m_{i}^{nucl}) + p_{i}^{free} \times Poisson(r_{i,l}, \lambda=m_{i}^{free})} \end{aligned} \end{equation} where $r_{i,l}$ is the number of reads at position $l$ in the i-th row of $R$, $m_{i}^{nucl}$ and $m_{i}^{free}$ are the mean parameters of the nucleosome and nucleosome-free classes respectively. Obviously, the nucleosome-free class posterior probability is \begin{equation} \begin{aligned} P(free | r_{i,l}) = 1 - P(nucl | r_{i,l}) \end{aligned} \end{equation} Then, during the M-step, the class mean parameters are updated using \begin{equation} \begin{aligned} m_{i}^{nucl} = & \sum_{l=1}^{L} r_{i,l} \times P(nucl | r_{i,l}) \\ m_{i}^{free} = & \sum_{l=1}^{L} r_{i,l} \times P(free | r_{i,l}) \end{aligned} \end{equation} and the class probabilities : \begin{equation} \begin{aligned} p_{i}^{nucl} = & \frac{1} {L} \times \sum_{l=1}^{L} P(nucl | r_{i,l}) \\ p_{i}^{free} = & 1 - p_{i}^{nucl} \end{aligned} \end{equation} The EM optimization of the parameter estimates was repeated for 10 iterations. At the end of the parameter estimation process, each of the $L$ positions in a region $R_{i}$ were assigned two posterior probabilities $P(nucl | r_{i,l})$ and $P(free | r_{i,l})$ to belong to each class. In all cases, the nucleosome class was the class having the highest mean parameter and the nucleosome free class the class with the smallest ($m_{i}^{nucl} > m_{i}^{free}$). The binding sites - located in the center of the regions, at position $s = L/2$ - were assumed to be within the NDR. From that point, the NDR was extended using the following procedure : \SetKwProg{Fn}{}{\{}{}\SetKwFunction{Function}{float NDRextend}% \begin{algorithm}[H] \label{encode_peaks_algo_ndr_extend} \Fn{\Function{}} { \KwData{The posterior probabilities obtained for each position of $r_{i}$.} \KwResult{the left and right coordinates of the NDR} \tcp{NDR only covers the central location} $left = s$ \; $right = s$ \; \While{$left \ne 2$ and $right \ne L-1$} { $p.free.l = P(free|r_{i,left})$ \; $p.free.r = P(free|r_{i,right})$ \; $p.nucl.l = P(nucl|r_{i,left})$ \; $p.nucl.r = P(nucl|r_{i,right})$ \; \tcp{bidirectional extension} \If{$prob.free.l > p.nucl.l$ and $p.prob.free.r > p.nucl.r$} { $left \minuseq 1$ \; $right \pluseq 1$ \; } \tcp{extension to left} \ElseIf{$prob.free.l > p.nucl.l$} { $left \minuseq 1$ \; } \tcp{extension to right} \ElseIf{$p.prob.free.r > p.nucl.r$} { $right \pluseq 1$ \; } \tcp{no more extension possible} \Else { break \; } } \Return{$left$, $right$} } \caption{Searches the coordinates of the NDR using the posterior nucleosome and nucleosome free class probabilities, for a region $R_i$, from its central position.} \end{algorithm} The nucleosome occupancy around CTCF binding sites was measured using ChIP-extract with "wgEncodeAwgTfbsSydhGm12878Ctcfsc15914sc20UniPk" peak list as reference - because it was the CTCF peak list with the most peaks and with the highest proportion of peaks with a CTCF motif -, the ENCODE MNase-seq data described in section \ref{encode_peaks_methods_data} as targets and the following parameters : from -999bp, to 1000bp and window size 10bp. This matrix was subjected to a ChIPPartitioning partitioning, as described in section \ref{encode_peaks_em_mnase}, to find 4 nucleosome architectures, using shifting and flipping. The resulting posterior probabilities were used to re-orient the data. If the major shift state - that is the shift state with the highest overall probability - for a given region was the "reverse" state, then the row was reversed. The re-oriented matrix was then subjected to the NDR detection. The re-orientation was done for aesthetic purposes only. Because the NDR detection was performed starting from the center position in each region - and given that reverting a vector did not change its central position - this operation had no influence on the NDR detection. \subsection{CTCF and JunD interactors} % Enumerating motif instances genome-wide To enumerate instance of CTCF and JunD motif, the hg19 genome assembly was scanned using CTCF (MA0139.1 from JASPAR Core Vertebrate 2014 \citep{mathelier_jaspar_2014}) and JunD (JUND\_HUMAN.H10MO.A from HOCOMOCOv10 \citep{kulakovskiy_hocomoco:_2016}) matrices to produce lists of potential binding sites. A limit score threshold was set as the score corresponding to a pvalue of 1e-5 for each matrix, respectively. This was done using matrix\_scan program from PWMScan \citep{ambrosini_pwmscan:_2018}. Eventually, any motif instance falling inside a region classified as being a repeated element and blacklisted by the ENCODE Consortium was filtered out using count\_filter program from the ChIP-seq tools \citep{ambrosini_chip-seq_2016-1}. % Measuring motif instance occurence near peaks Then, for each TF peak list independently, the number of i) the TF and ii) CTCF/JunD instances +/- 1kb of each peak was measured, in bins of 1bp, using ChIP-extract program from the ChIP-seq tools \citep{ambrosini_chip-seq_2016-1}. The association were measured as follows : using the ChIP-extract results for the given peak list versus i) the TF and ii) CTCF/Jund motif instances, the number of peaks having i) at least one TF and one CTCF/Jund motif instances, ii) only TF motif instances, iii) only CTCF/JunD motif instances or iv) no motif instance. These numbers were used to build a contingency table and a two-sided Fisher exact test for association was performed. The motif relationship was considered significant if the test OR was bigger than 1 and the 95\% CI of the OR did not contain 1 or as a significant motif exclusion if the OR was smaller than 1 and the 95\% CI of the OR did not contain 1. % Motif density around peaks The motif occurence densities were computed from the ChiP-extract result matrices. Out of each matrix, a vector containing the number of motif instances at each possible absolute distance was computed. This was done as follows : first each each non-null cell neighbours were incremented (+/- 5 columns on each side) to turn motif instance hits into non point-like representation. A given cell value could be incremented several times. Second for each row, the column corresponding to the same absolute distances from the peak were summed together (i.g. +1bp with -1bp, +2bp with -2bp, +999bp with -999bp). The first column of the resulting matrix should contain number of motif instances present at the peak center (distance of 0bp), the second column at an absolute distance of 1bp and so one. Eventually, the row were summed up and the resulting vector was considered as the motif density vector for the given peak list. The vectors were used to create a matrix for CTCF motif and Jund motif (a vector corresponds to a row), separately, and the matrix was displayed as a heatmap. The row values were standardized and the rows hierarchically clustered using the euclidean distance. \subsection{EBF1 and nucleosome} The correlation between EBF1 binding sites and nucleosome dyads was made using ChIP-cor \citep{ambrosini_chip-seq_2016-1}, from the web (\url{https://ccg.epfl.ch/chipseq/chip_cor.php}). The references were the corrected EBF1 peaks (wgEncodeAwgTfbsHaibGm12878Ebf1sc137065Pcr1xUniPk dataset, for more details see section \ref{encode_peaks_methods_data}) and the targets either i) the MNase-seq data released by Gaffney et al. \citep{gaffney_controls_2012} (hg19 / DNase FAIRE etc / Gaffney 2012 ... / All Paired-end samples - 147bp fragments) or ii) the ENCODE MNase-seq data (hg19 / ENCODE DNase FAIRE etc / GSE35586 ... / GM12878 Nucleosome all (SLOW!)). In both cases, "any" strand was selected. Because Gaffney data are paired-ended and represent the fragment midpoint (the dyad), no centering was done. The ENCODE data are single-ended and a centering of 70bp (half a nucleosome) was applied to approximate the fragment midpoint. The count cut-off was set to 1 and the range to -399 to +400bp. To isolate nucleosomes with an EBF1 binding site, the opposite ChIP-cor analysis was run : Gaffney data as references versus EBF1 binding sites as targets with count cut-off set to 1 and the range to -399 to +400bp. In the results page the "Feature Selection Tool" was used to select dyads with at least 1 EBF1 binding site (threshold parameter) located "From" -99bp "To" 100bp. The count cut-off was set to 9999 and both "Switch to depleted feature" and "Reference feature oriented" set to "Off". These nucleosome dyads were uploaded to OProf (\url{https://ccg.epfl.ch/ssa/oprof.php}) on the SSA server \citep{ambrosini_signal_2003}. Four individual analyses were run to measure the "WW", "SS", "YRRRRRYYYYYR" and EBF1 motif occurrences. In all cases, the 5' and 3' borders were set to -399bp and 400bp, the window shift to 1bp and the search mode to "bidirectional". For "SS" and "WW", the motif to search was entered as a "Consensus sequence", the window size was set to 2bp, the reference position to 1 and the number of allowed mismatches to 0. For "YRRRRRYYYYYR", the motif was also entered as a "Consensus sequence", the window size was set to 12bp, the reference position to 6 and the number of allowed mismatches to 4. For the EBF1 motif, the JASPAR CORE Vertebrate 2018 "EBF1 MA0154.3 (length=14)" was used with a window size of 14bp, a reference position of 7 and a p-value threshold of 1e-4. To investigate the chromatin architecture around nucleosome dyads, ChIP-cor was used. Two references were used : i) the nucleosomes with an EBF1 binding site (see above) and ii) the entire Gaffney dataset (hg19 / DNase FAIRE etc / Gaffney 2012 ... / All Paired-end samples - 147bp fragments). For each reference, three analyses were run against different target features : i) DNase-seq data to monitor chromatin accessibility (hg19 / ENCODE DNase FAIRE etc / Boyle 2008 ... DNaseI HS - GM12878 - Rep 1) with "any" strand and no centering, ii) H3K4me2 ChIP-seq data (hg19 / ENCODE ChIP-seq / GSE29611 ... / GM12878 H3k4me2) with "any" strand and a centering of 70bp (half the nucleosome) and iii) positional sequence conservation scores (hg19 / Sequence derived / Vertebrate Conservation (phastCons46way) ... / PHASTCONS VERT46) with "any" strand an no centering. For DNase-seq and sequence conservation, the range was set to -399bp to 400bp with a window with of 1bp. For H3K3me2 data, the range was set to -3999bp to 4000bp with a window width of 10bp. For the DNase-seq and the H3K4me2 data, the count cut-off were set to 1, for the sequence conservation to 9999. diff --git a/main/ch_spark.aux b/main/ch_spark.aux index 4ff6fd4..7f6fcda 100644 --- a/main/ch_spark.aux +++ b/main/ch_spark.aux @@ -1,98 +1,102 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{groux_spar-k:_2019} \citation{hon_chromasig:_2008} \citation{lai_archalign:_2010} -\citation{nielsen_catchprofiles} +\citation{nielsen_catchprofiles:_2012} \citation{kundaje_ubiquitous_2012} \citation{nair_probabilistic_2014} \@writefile{toc}{\contentsline {chapter}{\numberline {4}SPar-K}{59}{chapter.4}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} \newlabel{spark}{{4}{59}{SPar-K}{chapter.4}{}} \@writefile{chapter}{\contentsline {toc}{SPar-K}{59}{chapter.4}} \@writefile{toc}{\contentsline {section}{\numberline {4.1}Algorithm}{59}{section.4.1}} \citation{arthur_k-means++:_2007} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} +\citation{groux_spar-k:_2019} +\citation{groux_spar-k:_2019} \@writefile{toc}{\contentsline {section}{\numberline {4.2}Implementation}{60}{section.4.2}} -\citation{leisch_toolbox_2006} -\citation{nair_probabilistic_2014} -\citation{nair_probabilistic_2014} -\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Synthethic datasets : \textbf {A} The class signal densities. \textbf {B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf {C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf {D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf {E} one of the corresponding SPar-K partition, with shifting and flipping.\relax }}{61}{figure.caption.23}} -\newlabel{spark_simulated_data}{{4.1}{61}{Synthethic datasets : \textbf {A} The class signal densities. \textbf {B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf {C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf {D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf {E} one of the corresponding SPar-K partition, with shifting and flipping.\relax }{figure.caption.23}{}} -\@writefile{toc}{\contentsline {section}{\numberline {4.3}Benchmarking}{61}{section.4.3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.3.1}K-means}{61}{subsection.4.3.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Synthethic datasets : \textbf {A} The class signal densities. \textbf {B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf {C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf {D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf {E} one of the corresponding SPar-K partition, with shifting and flipping. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{61}{figure.caption.23}} +\newlabel{spark_simulated_data}{{4.1}{61}{Synthethic datasets : \textbf {A} The class signal densities. \textbf {B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf {C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf {D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf {E} one of the corresponding SPar-K partition, with shifting and flipping. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.23}{}} \@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces \textbf {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{62}{figure.caption.24}} \newlabel{spark_ari}{{4.2}{62}{\textbf {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.24}{}} \@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textbf {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf {A} Seeding done at random, \textbf {B} seeding done at random and outlier smoothing \textbf {C} seeding done with the K-means++ method \textbf {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{63}{figure.caption.25}} \newlabel{spark_sse}{{4.3}{63}{\textbf {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf {A} Seeding done at random, \textbf {B} seeding done at random and outlier smoothing \textbf {C} seeding done with the K-means++ method \textbf {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.25}{}} \@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces \textbf {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{63}{figure.caption.26}} \newlabel{spark_time}{{4.4}{63}{\textbf {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.26}{}} -\citation{groux_spar-k:_2019} +\citation{leisch_toolbox_2006} +\citation{nair_probabilistic_2014} +\citation{nair_probabilistic_2014} +\@writefile{toc}{\contentsline {section}{\numberline {4.3}Benchmarking}{64}{section.4.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.3.1}K-means}{64}{subsection.4.3.1}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.3.2}ChIPPartitioning}{64}{subsection.4.3.2}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.3.3}Data}{64}{subsection.4.3.3}} -\citation{ambrosini_chip-seq_2016} +\citation{groux_spar-k:_2019} +\citation{groux_spar-k:_2019} +\citation{groux_spar-k:_2019} \citation{ambrosini_chip-seq_2016} \citation{groux_spar-k:_2019} +\citation{ambrosini_chip-seq_2016} \citation{groux_spar-k:_2019} \citation{bailey_meme_2009} -\citation{kundaje_ubiquitous_2012} \@writefile{toc}{\contentsline {subsection}{\numberline {4.3.4}Performances}{65}{subsection.4.3.4}} \@writefile{toc}{\contentsline {section}{\numberline {4.4}Partition of DNase and MNase data}{65}{section.4.4}} -\@writefile{toc}{\contentsline {section}{\numberline {4.5}Conclusions}{65}{section.4.5}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf {A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf {B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf {C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf {D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf {E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf {F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep {ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf {G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf {H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster.\relax }}{66}{figure.caption.27}} -\newlabel{spark_ctcf}{{4.5}{66}{Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf {A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf {B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf {C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf {D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf {E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf {F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep {ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf {G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf {H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster.\relax }{figure.caption.27}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{67}{figure.caption.28}} -\newlabel{spark_dnase}{{4.6}{67}{Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.28}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{66}{figure.caption.27}} +\newlabel{spark_dnase}{{4.5}{66}{Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.27}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf {A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf {B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf {C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf {D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf {E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf {F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep {ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf {G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf {H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{67}{figure.caption.28}} +\newlabel{spark_ctcf}{{4.6}{67}{Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf {A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf {B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf {C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf {D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf {E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf {F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep {ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf {G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf {H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.28}{}} +\citation{kundaje_ubiquitous_2012} +\@writefile{toc}{\contentsline {section}{\numberline {4.5}Conclusions}{68}{section.4.5}} \@setckpt{main/ch_spark}{ -\setcounter{page}{68} +\setcounter{page}{69} \setcounter{equation}{1} \setcounter{enumi}{8} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{4} \setcounter{section}{5} \setcounter{subsection}{0} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{6} \setcounter{table}{0} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{0} \setcounter{lstnumber}{1} \setcounter{Item}{8} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{28} \setcounter{algocfline}{1} \setcounter{algocfproc}{1} \setcounter{algocf}{1} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/main/ch_spark.tex b/main/ch_spark.tex index 23fa6b1..eb640ea 100644 --- a/main/ch_spark.tex +++ b/main/ch_spark.tex @@ -1,119 +1,121 @@ \cleardoublepage \chapter{SPar-K} \label{spark} \markboth{SPar-K}{SPar-K} \addcontentsline{chapter}{toc}{SPar-K} -This chapter describes SPar-K (Signal Partitioning with K-means), a modification of the K-means algorithm to cluster genomic regions based on their chromatin organization, defined by by their sequencing profiles. +This chapter describes SPar-K (Signal Partitioning with K-means), a modification of the K-means algorithm to cluster genomic regions based on their chromatin organization, defined by their sequencing profiles. I developed, implemented and benchmark this algorithm and produced all the figures that are shown in this chapter. The content of this section is taken an adapted from the original article \citep{groux_spar-k:_2019}. % Due to the wealth of sequencing data, it is common to analyze positional correlations between chromatin features, e.g. the position of nucleosomes (revealed by MNase-seq) relative to transcription factor binding regions (mapped by ChIP-seq) in order to shed light on their functional relationship. However, as noted in \citep{kundaje_ubiquitous_2012}, chromatin patterns tend to be heterogeneous and often asymmetric. Furthermore, limited mapping precision leading to moderate misalignment between functionally equivalent regions can also obscure a chromatin pattern. -Due to the wealth of sequencing data, it is common to analyze positional correlations between chromatin features, e.g. the position of nucleosomes (revealed by MNase-seq) relative to transcription factor binding regions (mapped by ChIP-seq) in order to shed light on their functional relationship. Several methods and software have been developed for discovering chromatin pattern by clustering and/or realignment of signal profiles for genomic regions (see section \ref{intro_pattern_discovery}), including ChromaSig \citep{hon_chromasig:_2008}, ArchAlign \citep{lai_archalign:_2010}, CATCHProfiles \citep{nielsen_catchprofiles}, CAGT \citep{kundaje_ubiquitous_2012} and ChIPPartitioning\citep{nair_probabilistic_2014}. However, all of these programs have some limitations. Some do not realign, others are restricted to count data or lack an runtime efficient implementation, such as ChIPParititioning. To fill this gap, I developed SPar-K (Signal Partitioning with K-means). +Due to the wealth of sequencing data, it is common to analyze positional correlations between chromatin features, e.g. the position of nucleosomes (revealed by MNase-seq) relative to transcription factor binding regions (mapped by ChIP-seq) in order to shed light on their functional relationship. Several methods and software have been developed for discovering chromatin patterns by clustering and/or realigning read density profiles over genomic regions (see section \ref{intro_pattern_discovery}), including ChromaSig \citep{hon_chromasig:_2008}, ArchAlign \citep{lai_archalign:_2010}, CATCHProfiles \citep{nielsen_catchprofiles:_2012}, CAGT \citep{kundaje_ubiquitous_2012} and ChIPPartitioning\citep{nair_probabilistic_2014}. However, these programs have some limitations. Some do not perform a realignment, others are restricted to count data or lack an runtime efficient implementation, such as ChIPParititioning. To fill this gap, I developed SPar-K (Signal Partitioning with K-means). \section{Algorithm} % summary -SPar-K algorithm (Algorithm \ref{algo_spark}) is a modified version of the regular K-means algorithm during which a set of $N$ regions of size $L$ are partitioned into $K$ clusters, using an iterative optimization procedure. Each cluster is composed of an alignment of regions sub-parts of length $W$ assigned to this cluster and the cluster is summarized a vector of length $L \geq W$ that contains the average signal at each position in the alignment. +SPar-K algorithm (see Algorithm \ref{algo_spark}) is a modified version of the regular K-means algorithm during which a set of $N$ regions of size $L$ are partitioned into $K$ clusters, using an iterative optimization procedure. Each cluster is composed of an alignment of regions sub-parts of length $L'a$ assigned to this cluster and the cluster is summarized a vector of length $L \geq L'$ that contains the average signal at each position in the alignment. % input -The input data are stored as a $N$ rows and $L$ columns matrix $R$. The signal resolution may be at single-base or at a larger bin size. The regions are typically defined by relative positions to an anchor point, e.g. a ChIP-seq peak summit. If the signal is noisy, a data smoothing step can be undertaken to average out outlier values (Algorithm \ref{algo_smooth_outliers}) and ease the partitioning procedure. +The input data are stored as a $N$ rows and $L$ columns matrix $R$. The signal resolution may be at single-base or at a larger bin size. The regions are typically defined by relative positions to an anchor point, e.g. a ChIP-seq peak summit. If the signal is noisy, a data smoothing step can be performed to average out outlier values (see Algorithm \ref{algo_smooth_outliers}) and ease the partitioning procedure. % objective function -SPar-K optimizes the alignments by minimizing the sum of squares errors. That is, the sum of the squares distances of each point to the cluster aggregation they are assigned to. +SPar-K optimizes the alignments by minimizing the sum of squares errors. That is, the sum of the squared distances of each point to the cluster aggregation they are assigned to. % distances -The distance between any two regions is computed a modified correlation distance. Let us assume two regions $X$ and $Y$ of length $L$ and a shifting freedom $S$. $X$ and $Y$ will be sub-divided in $S$ slices each. Each slice has a length of $W$=$L-S-1$ and starts at all possible offsets $s=1,2,...,S$. All $S^{2}$ pairwise comparisons between any slices of $X$ and $Y$ are computed using $1-cor(X_{i},Y_{j})$ where $X_{i}$ and $Y_{j}$ are the slices starting at offsets $i,j \in s$. If flipping is allowed, another set of $S^{2}$ comparisons is performed by flipping $Y_{j}$ (that is, the 1st position in $Y_{j}$ becomes the last and vice-versa), resulting in $2 \times S^{2}$ comparisons. Eventually, the distance between $X$ and $Y$ is the minimum of the $S^{2}$ (without flipping) or $2 \times S^{2}$ (with flipping) values. For each distance, the indices $i$ and $j$ and whether $Y_{j}$ was flipped in the best comparison are remembered as they allow to rebuilt the optimal alignment between $X$ and $Y$. The naive algorithm to do this is $\Theta(S^{2} \times W)$ in time however I could design a faster algorithm which is $\Theta(S \times W)$ by using a dynamic programming approach (see algorithm \ref{algo_distance_fast}). +The distance between any two regions is computed using a modified correlation distance. Let us assume two regions $X$ and $Y$ of length $L$ and a shifting freedom $S$. $X$ and $Y$ will be sub-divided in $S$ slices each. Each slice has a length of $L'$=$L-S+1$ and starts at all possible offsets $s=1,2,...,S$. All $S^{2}$ pairwise comparisons between any slices of $X$ and $Y$ are computed using $1-cor(X_{i},Y_{j})$ where $X_{i}$ and $Y_{j}$ are the slices starting at offsets $i,j \in s$. If flipping is allowed, another set of $S^{2}$ comparisons is performed by flipping $Y_{j}$ (that is, the 1st position in $Y_{j}$ becomes the last and vice-versa), resulting in $2 \times S^{2}$ comparisons. Eventually, the distance between $X$ and $Y$ is the minimum of the $S^{2}$ (without flipping) or $2 \times S^{2}$ (with flipping) values. For each distance, the indices $i$ and $j$ and whether $Y_{j}$ was flipped in the best comparison are remembered as they allow to rebuilt the optimal alignment between $X$ and $Y$. The naive algorithm to do this is $\Theta(S^{2} \times L')$ in time however I could design a faster algorithm which is $\Theta(S \times L')$ by using a dynamic programming approach (see algorithm \ref{algo_distance_fast}). % iteration walk-through -SPar-K is initialized by choosing $K$ regions to become the initial cluster aggregations of length $L$ either i) randomly (Algorithm \ref{algo_seed_random}) or ii) using the K-means++ sampling procedure (Algorithm \ref{algo_seed_kmeans++}, \cite{arthur_k-means++:_2007}). Then, each regions is aligned against each cluster aggregation an assigned to the cluster to which it has the smallest distance with. Once all $N$ regions have been aligned to a cluster, the cluster aggregations are updated by computed the average signal at each position in the alignments. +SPar-K is initialized by choosing $K$ regions to become the initial cluster aggregations of length $L$ either i) randomly (Algorithm \ref{algo_seed_random}) or ii) using the K-means++ \citep{arthur_k-means++:_2007} sampling procedure (see Algorithm \ref{algo_seed_kmeans++}). Then, each regions is aligned against each cluster aggregation an assigned to the cluster to which it has the smallest distance with. Once all $N$ regions have been aligned to a cluster, the cluster aggregations are updated by computing the average signal at each position in the alignments. -This procedure and is then repeated until i) reaching the maximum number of iterations or ii) achieving convergence. +This procedure and is repeated until i) reaching the maximum number of iterations or ii) achieving convergence, that is when the alignments in each cluster do not change from one iteration to the next. \section{Implementation} SPar-K algorithm has been implemented as a stand-alone, fully multithreaded, C++ program. Regarding the parallellization, the computations at each step are independent of each other, leading to an ”embarrassingly parallel” situation. Thus, at each step, the computations are split into equal amounts and distributed over a pool of worker threads. Eventually, the program returns a table listing for each region the cluster assignment, the shift state and the orientation. The software distribution also includes R scripts for visualizing the data as heatmaps as shown in Figure \ref{spark_dnase}. The software source code is available from Github \url{https://github.com/romaingroux/SPar-K} and as Docker container \url{https://hub.docker.com/r/rgroux/spar-k}. \section{Benchmarking} \begin{figure} - \centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure1.pdf}} - \caption{Synthethic datasets : \textbf{A} The class signal densities. \textbf{B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf{C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf{D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf{E} one of the corresponding SPar-K partition, with shifting and flipping.}. + \centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure1.png}} + \caption{Synthethic datasets : \textbf{A} The class signal densities. \textbf{B} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 0\% noise ($p_{s}$=1, $p_{b}$=0) and \textbf{C} one of the corresponding SPar-K partition, with shifting and flipping. The color ribbons on the side indicate the cluster assignments. \textbf{D} A synthetic dataset with a mean coverage of a 100 reads per region in average ($c$=100) and 90\% noise ($p_{s}$=0.1, $p_{b}$=0.9) and \textbf{E} one of the corresponding SPar-K partition, with shifting and flipping. +Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.}. \label{spark_simulated_data} \end{figure} % supplemental figure 2 from article \begin{figure} - \centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure2.pdf}} + \centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure2.png}} \caption{\textbf{Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.} \label{spark_ari} \end{figure} % supplemental figure 4 from article \begin{figure} -\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure4.pdf}} +\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure4.png}} \caption{\textbf{Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf{A} Seeding done at random, \textbf{B} seeding done at random and outlier smoothing \textbf{C} seeding done with the K-means++ method \textbf{D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.} \label{spark_sse} \end{figure} % supplemental figure 5 from article \begin{figure} -\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure5.pdf}} +\centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure5.png}} \caption{\textbf{Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.} \label{spark_time} \end{figure} -First I compared SPar-K, regular K-means and ChIP-partitioning on synthetic datasets exhibiting properties that are plausible for ChIP-seq profiles for genomic regions. +First I compared SPar-K, regular K-means and ChIPPartitioning on synthetic datasets exhibiting properties that are plausible for ChIP-seq profiles for genomic regions. \subsection{K-means} For the regular K-means, the "kccaFamily" function from the "flexclust" R package \citep{leisch_toolbox_2006} was used. Calls to kccaFamily(dist=distEuclidean, cent=centMean) or kccaFamily(dist=distCor, cent=centMean) were employed to partition the data using the euclidean distance and the correlation distance respectively. "distEuclidean" is a package defined function and "distCor", a custom function computing $1 - cor(x,y)$ for any two $x$ and $y$ vectors. If the correlation between $x$ and $y$ could not be computed (for instance the standard deviation of $x$ or $y$ is equal to 0), the correlation was assumed to be 0 (and the distance 1). The initial centers were chosen using one of the two following seeding strategies : i) a random sampling of $K$ points or ii) K-means++, a strategy aiming at sampling $K$ initial points as far as possible from each other. \subsection{ChIPPartitioning} The implementation was done in R programming language. The "em\_shape", "em\_shape\_shift" and "em\_shape\_shift\_flip" functions present in the supplemental material of \citep{nair_probabilistic_2014} were taken as such and incorporated in a R wrapper (as in Chapter \ref{encode_peaks}). For this method, the partitioning could only be initialized using a random procedure, as described in \citep{nair_probabilistic_2014}. \subsection{Data} I generated several synthetic datasets. Each dataset contained 1000 regions of 2001bp (+/- 1kb around a central position), equally distributed over 3 classes. The signal over a region was modeled as a mixture of class specific signal and of background signal. The class specific signal was modeled by a 1902 element density vector. The background signal was modeled using a second 1902 element density vector containing a uniform density. The first class density vector contained a Gaussian density with mean 951 and standard deviation 40 (Figure \ref{spark_simulated_data}A upper panel). The second class density was a Gaussian density of mean 950 and standard deviation 40. To create an asymmetric signal class, the values at positions 950 to 1902 (comprised) were set to the minimal value found in the original density (Figure \ref{spark_simulated_data}A middle panel). The last class contained a rectangular function with a step corresponding to the elements 830 to 1070 (Figure \ref{spark_simulated_data}A lower panel). Finally, all the densities were normalized such that the sum of each vector was 1. From these densities, the $\lambda$ values for a class $k$ were computed using the following formula : \begin{equation} lambdas_{k} = signal_{k} * c * p_{s} + background * c * p_{b} \end{equation} where $signal_{k}$ is the class characteristic signal density, $background$ a uniform density, $c$ the coverage factor, $p_{s}$ the overall signal proportion and $p_{b}$ the overall background proportion, with the constraint $p_{s} + p_{b} = 1$. % generations For each region, a read signal of 1902bp long was randomly sampled from Poisson distributions with the $lambdas$ values as function parameters. Then, the signal vector was introduced, in a 2001 element long vector filled of 0's, at a given offset, in a given orientation. The offset was randomly sampled from 1 to 100 . The orientation was randomly sampled with a probability of 0.3 to be in the reversed orientation. Finally, the resulting 2001bp vectors were binned using a 10bp window, that is, the signal was summed up every 10 columns leading to the creation of 201 bin long vectors. At the end of the process, a dataset was stored as a matrix of 1000 rows and 201 columns. Two examples of synthetic datasets are shown in Figure \ref{spark_simulated_data}B and D.\\ \subsection{Performances} -Performance was assessed by the Adjusted Rand index (see Figure \ref{spark_ari} and Supplemental Figure 3 in \citep{groux_spar-k:_2019}) and the optimal number of classes was estimated by the elbow method (Figure \ref{spark_sse}). As expected, regular K-means performed poorly. On the contrary, SPar-K was equally accurate as ChIP-Partitioning except for the lowest coverage class. Considering speed, Spar-K outperformed ChIP-partitioning by a factor of at least 20 (Figure \ref{spark_time}). +Performances were assessed using the Adjusted Rand index (see Figure \ref{spark_ari} and Supplemental Figure 3 in \citep{groux_spar-k:_2019}) and the optimal number of classes was estimated using the elbow method (Figure \ref{spark_sse}). As expected, regular K-means performed poorly. On the contrary, SPar-K was equally accurate as ChIPPartitioning except for the lowest coverage class. Considering speed, Spar-K outperformed ChIPPartitioning by a factor of at least 20 (Figure \ref{spark_time}). \section{Partition of DNase and MNase data} -% supplemental figure 8 from article -\begin{figure} - \centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure8.pdf}} - \caption{Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf{A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf{B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf{C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf{D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf{E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf{F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep{ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf{G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf{H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster.} - \label{spark_ctcf} -\end{figure} - % figure 1 from article \begin{figure} -\centerline{\includegraphics[scale=0.4]{images/ch_spark/figure1.pdf}} +\centerline{\includegraphics[scale=0.4]{images/ch_spark/figure1.png}} \caption{Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method. \textbf{A.} Input data based on peak summits provided by ENCODE. \textbf{B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf{C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf{D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf{E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf{F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf{G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf{B}. Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.} \label{spark_dnase} \end{figure} -I applied SPar-K with $K=3$ to DNaseI accessibility profiles (2bp resolution) around 7'206 ChIP-seq SP1-binding peaks (+/-300bp relative to peak summit) in K562 cells (Figure \ref{spark_dnase}A). The results revealed the presence of clear footprints in all the clusters (Figure \ref{spark_dnase}B). To validate these footprints, I checked whether they are consistent with to location of nucleosomes (Figure \ref{spark_dnase}C) and SP1 binding motifs (Figure \ref{spark_dnase}D), which was indeed the case. De novo motif analysis of the narrow footprints seen in Figure \ref{spark_dnase}B with MEME-ChIP and Tomtom \citep{bailey_meme_2009} discovered SP1-related, NFYA/B and GATA motifs (Figure \ref{spark_dnase}G) the latter two reportedly being interaction partners of SP1. Taken together, these results suggest that SPar-K is able to precisely refocus initially misaligned DNaseI profiles around SP1 binding sites. +% supplemental figure 8 from article +\begin{figure} + \centerline{\includegraphics[scale=0.4]{images/ch_spark/supplemental_figure8.png}} + \caption{Nucleosome occupancy, determined by MNase-seq, in bins of 10bp, +/- 1000bp around 79'957 CTCF binding sites in GM12878 cells. \textbf{A} MNaseI-seq read density around the CTCF binding sites. ChIP-seq peak summits are aligned at position 0. The regions (rows) are ordered according the their resemblance (correlation) to the overall aggregation pattern. \textbf{B} SPar-K data partition. The number of clusters (4) was determined using the elbow method. The cluster labels are indicated by the color ribbons on the left. Within each cluster, the data have been realigned according to the shift and flip informations returned by SPar-K and the regions have been ordered according the their resemblance (correlation) to the cluster aggregation pattern. Because of the realignment, ChIP-seq peak summits are not anymore aligned at position 0. \textbf{C} Corresponding DNaseI hypersensitivity measured by DNaseI-seq at the same loci and realigned as in B. \textbf{D} CTCF motif occurrences predicted using a motif scan, at the same loci and realigned as in B. Each predicted binding site, +/- 1kb around a peak, is represented as a point. \textbf{E} Transcription start site (TSS) density at the same loci and realigned as in B. \textbf{F} Cluster 1 (red) aggregation profiles. The original peak coordinates were modified accordingly to the shift and flip values returned by SPar-K and the read densities the different data types were measured using ChIP-Cor \citep{ambrosini_chip-seq_2016}. For the TSSs and the transcription initiation (CAGE), only the data mapping on the negative strand were used to monitor transcription firing towards the nucleosome array (towards the left). \textbf{G} Proportions of regions having at least one CTCF motif +/- 1kb (same motifs as in D), for each cluster. \textbf{H} Proportions of regions having at least one TSS +/- 1kb (same TSSs as in E), for each cluster. +Figure and legend taken and adapted from \citep{groux_spar-k:_2019}.} + \label{spark_ctcf} +\end{figure} + +I applied SPar-K with $K=3$ to DNaseI accessibility profiles (2bp resolution) around 7'206 ChIP-seq SP1-binding peaks (+/-300bp relative to peak summit) in K562 cells (Figure \ref{spark_dnase}A). The results revealed the presence of clear footprints in all the clusters (Figure \ref{spark_dnase}B). To validate these footprints, I checked whether they were consistent with the location of nucleosomes (Figure \ref{spark_dnase}C) and SP1 binding motifs (Figure \ref{spark_dnase}D), which was indeed the case. A de novo motif analysis of the narrow footprints seen in Figure \ref{spark_dnase}B with MEME-ChIP and Tomtom \citep{bailey_meme_2009} revealed SP1-related, NFYA/B and GATA motifs (Figure \ref{spark_dnase}G) the latter two reportedly being interaction partners of SP1. Taken together, these results suggest that SPar-K is able to precisely refocus initially misaligned DNaseI profiles around SP1 binding sites. -The partitioning of SP1 binding regions reveals distinct chromatin landscapes. Cluster 1 (red) groups binding sites lying between two closely spaced nucleosomes. Cluster 2 (blue) shows strong asymmetry suggestive of promoter regions, an interpretation supported by the presence of promoter-associated transcription starts sites (TSSs) and CAGE tags (Figure \ref{spark_dnase}E and F). Finally, the symmetrical cluster 3 (green) contains binding sites located on large nucleosome-free regions reminiscent of enhancer regions. +The partitioning of SP1 binding regions revealed distinct chromatin landscapes. Cluster 1 (red) groups binding sites lying between two closely spaced nucleosomes. Cluster 2 (blue) showed a strong asymmetry suggestive of promoter regions, an interpretation supported by the presence of TSSs indicative of promoters and of CAGE tags (Figure \ref{spark_dnase}E and F). Finally, the symmetrical cluster 3 (green) contained binding sites located on a large nucleosome-free regions reminiscent of enhancer regions. As a second example, I ran the same type of analysis on nucleosome profiles around CTCF binding sites (Figure \ref{spark_ctcf}). Overall, the results confirm observations from Chapter \ref{encode_peaks} and published in \citep{kundaje_ubiquitous_2012}. Strong nucleosome arrays became visible in all classes after realignment, with three out of four showing strong asymmetry in addition. \section{Conclusions} -SPar-K is a useful partitioning method for moderately misaligned and randomly oriented chromatin regions. Compared to existing methods, it is competitive in terms of accuracy, superior in speed, applicable to a wider range of input signals (not restricted to count data) and easier to use. +SPar-K is a useful partitioning method for moderately misaligned and randomly oriented chromatin regions. Compared to existing methods, it is competitive in terms of accuracy, superior in speed, applicable to a wider range of input signals (not restricted to count data) and easy to use. diff --git a/my_thesis.aux b/my_thesis.aux index c06422c..410dd7a 100644 --- a/my_thesis.aux +++ b/my_thesis.aux @@ -1,194 +1,194 @@ \relax \providecommand\hyper@newdestlabel[2]{} \providecommand\BKM@entry[2]{} \catcode `:\active \catcode `;\active \catcode `!\active \catcode `?\active \catcode `"\active \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined \global\let\oldcontentsline\contentsline \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} \global\let\oldnewlabel\newlabel \gdef\newlabel#1#2{\newlabelxx{#1}#2} \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} \AtEndDocument{\ifx\hyper@anchor\@undefined \let\contentsline\oldcontentsline \let\newlabel\oldnewlabel \fi} \fi} \global\let\hyper@last\relax \gdef\HyperFirstAtBeginDocument#1{#1} \providecommand\HyField@AuxAddToFields[1]{} \providecommand\HyField@AuxAddToCoFields[2]{} \providecommand \oddpage@label [2]{} \babel@aux{english}{} \babel@aux{french}{} \babel@aux{english}{} \@input{head/dedication.aux} \@input{head/acknowledgements.aux} \BKM@entry{id=1,dest={636861707465722A2E31},srcline={3}}{41636B6E6F776C656467656D656E7473} \pgfsyspdfmark {pgfid2}{0}{40463552} \pgfsyspdfmark {pgfid1}{6}{40498788} \@input{head/abstracts.aux} \BKM@entry{id=2,dest={636861707465722A2E32},srcline={9}}{4162737472616374205C28456E676C6973682F4672616E5C3334376169732F446575747363685C29} \pgfsyspdfmark {pgfid4}{0}{40463552} \pgfsyspdfmark {pgfid3}{6}{40498788} \pgfsyspdfmark {pgfid6}{0}{40463552} \pgfsyspdfmark {pgfid5}{6}{40498788} \BKM@entry{id=3,dest={746F632E30},srcline={30}}{436F6E74656E7473} \pgfsyspdfmark {pgfid8}{0}{40463552} \pgfsyspdfmark {pgfid7}{6}{40498788} \@input{main/ch_introduction.aux} \BKM@entry{id=4,dest={636861707465722E31},srcline={2}}{496E74726F64756374696F6E} \BKM@entry{id=5,dest={636861707465722E31},srcline={5}}{496E74726F64756374696F6E} \BKM@entry{id=6,dest={73656374696F6E2E312E31},srcline={13}}{41626F7574206368726F6D6174696E} \BKM@entry{id=7,dest={73756273656374696F6E2E312E312E31},srcline={18}}{546865206368726F6D6174696E20737472756374757265} \pgfsyspdfmark {pgfid10}{0}{40463552} \pgfsyspdfmark {pgfid9}{6}{40511883} \BKM@entry{id=8,dest={73756273656374696F6E2E312E312E32},srcline={42}}{546865206368726F6D6174696E2069732064796E616D6963} \BKM@entry{id=9,dest={73756273656374696F6E2E312E312E33},srcline={52}}{41626F7574206E75636C656F736F6D6520706F736974696F6E696E67} \BKM@entry{id=10,dest={73656374696F6E2E312E32},srcline={79}}{41626F7574207472616E736372697074696F6E20666163746F7273} \BKM@entry{id=11,dest={73756273656374696F6E2E312E322E31},srcline={95}}{544620636F2D62696E64696E67} \BKM@entry{id=12,dest={73656374696F6E2E312E33},srcline={126}}{47656E6520726567756C6174696F6E20696E2061206E75747368656C6C} \BKM@entry{id=13,dest={73756273656374696F6E2E312E332E31},srcline={139}}{546865206368726F6D6174696E2062617272696572} \BKM@entry{id=14,dest={73756273656374696F6E2E312E332E32},srcline={144}}{54467320636F6F70657261746976652062696E64696E67} \BKM@entry{id=15,dest={73756273656374696F6E2E312E332E33},srcline={151}}{50696F6E65657220544673} \BKM@entry{id=16,dest={73756273656374696F6E2E312E332E34},srcline={162}}{526567756C61746F727920656C656D656E7473} \BKM@entry{id=17,dest={73756273656374696F6E2E312E332E35},srcline={175}}{5468652067656E6F6D6520676F6573203344} \BKM@entry{id=18,dest={73656374696F6E2E312E34},srcline={186}}{4D6561737572696E67206368726F6D6174696E206665617475726573} \BKM@entry{id=19,dest={73756273656374696F6E2E312E342E31},srcline={191}}{4D6561737572696E672054462062696E64696E6720696E207669766F} \BKM@entry{id=20,dest={73756273656374696F6E2E312E342E32},srcline={203}}{4D6561737572696E672054462062696E64696E6720696E20766974726F} \BKM@entry{id=21,dest={73756273656374696F6E2E312E342E33},srcline={216}}{4D6561737572696E67206E75636C656F736F6D65206F63637570616E6379} \BKM@entry{id=22,dest={73756273656374696F6E2E312E342E34},srcline={232}}{4469676974616C20666F6F747072696E74696E67} \BKM@entry{id=23,dest={73656374696F6E2E312E35},srcline={264}}{4D6F64656C696E672073657175656E6365207370656369666963697479} \BKM@entry{id=24,dest={73756273656374696F6E2E312E352E31},srcline={329}}{416C69676E696E672062696E64696E67207369746573} \BKM@entry{id=25,dest={73756273656374696F6E2E312E352E32},srcline={337}}{506C6174697475646573} \BKM@entry{id=26,dest={73756273656374696F6E2E312E352E33},srcline={356}}{50726564696374696E672062696E64696E67207369746573} \BKM@entry{id=27,dest={73656374696F6E2E312E36},srcline={389}}{4F7665722D726570726573656E746564207061747465726E7320646973636F76657279} \@input{main/ch_lab_resources.aux} \BKM@entry{id=28,dest={636861707465722E32},srcline={2}}{4C61626F7261746F7279207265736F7572636573} \BKM@entry{id=29,dest={636861707465722E32},srcline={5}}{4C61626F7261746F7279207265736F7572636573} \BKM@entry{id=30,dest={73656374696F6E2E322E31},srcline={14}}{4D6173732047656E6F6D6520416E6E6F746174696F6E207265706F7369746F7279} \pgfsyspdfmark {pgfid13}{0}{40463552} \pgfsyspdfmark {pgfid12}{6}{40511883} \BKM@entry{id=31,dest={73756273656374696F6E2E322E312E31},srcline={21}}{4D474120636F6E74656E7420616E64206F7267616E697A6174696F6E} \BKM@entry{id=32,dest={73756273656374696F6E2E322E312E32},srcline={51}}{436F6E636C7573696F6E73} \BKM@entry{id=33,dest={73656374696F6E2E322E32},srcline={57}}{45756B6172796F7469632050726F6D6F746572204461746162617365} \BKM@entry{id=34,dest={73756273656374696F6E2E322E322E31},srcline={75}}{4550446E6577206E6F7720616E6E6F7461746573205C28736F6D65206F665C2920796F7572206D757368726F6F6D7320616E6420766567657461626C6573} \BKM@entry{id=35,dest={73756273656374696F6E2E322E322E32},srcline={109}}{496E63726561736564206D617070696E6720707265636973696F6E20696E2068756D616E} \BKM@entry{id=36,dest={73756273656374696F6E2E322E322E33},srcline={121}}{496E746567726174696F6E206F66204550446E65772077697468206F74686572207265736F7572636573} \BKM@entry{id=37,dest={73756273656374696F6E2E322E322E34},srcline={127}}{436F6E636C7573696F6E73} \BKM@entry{id=38,dest={73756273656374696F6E2E322E322E35},srcline={133}}{4D6574686F6473} \@input{main/ch_encode_peaks.aux} \BKM@entry{id=39,dest={636861707465722E33},srcline={2}}{454E434F4445207065616B7320616E616C79736973} \BKM@entry{id=40,dest={636861707465722E33},srcline={5}}{454E434F4445207065616B7320616E616C79736973} \BKM@entry{id=41,dest={73656374696F6E2E332E31},srcline={19}}{44617461} \pgfsyspdfmark {pgfid15}{0}{40463552} \pgfsyspdfmark {pgfid14}{6}{40511883} \BKM@entry{id=42,dest={73656374696F6E2E332E32},srcline={45}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206368726F6D6174696E2061726368697465637475726573} \BKM@entry{id=43,dest={73756273656374696F6E2E332E322E31},srcline={86}}{44617461207265616C69676E6D656E74} \BKM@entry{id=44,dest={73656374696F6E2E332E33},srcline={98}}{4E75636C656F736F6D65206F7267616E697A6174696F6E2061726F756E64207472616E736372697074696F6E20666163746F722062696E64696E67207369746573} \BKM@entry{id=45,dest={73656374696F6E2E332E34},srcline={126}}{5468652063617365206F6620435443462C2052414432312C20534D43332C2059593120616E64205A4E46313433} \BKM@entry{id=46,dest={73656374696F6E2E332E35},srcline={165}}{4354434620616E64204A756E4420696E7465726163746F6D6573} \BKM@entry{id=47,dest={73656374696F6E2E332E36},srcline={258}}{454246312062696E6473206E75636C656F736F6D6573} \BKM@entry{id=48,dest={73656374696F6E2E332E37},srcline={295}}{44697363757373696F6E} \BKM@entry{id=49,dest={73656374696F6E2E332E38},srcline={311}}{4D6574686F6473} \BKM@entry{id=50,dest={73756273656374696F6E2E332E382E31},srcline={313}}{4461746120616E6420646174612070726F63657373696E67} \BKM@entry{id=51,dest={73756273656374696F6E2E332E382E32},srcline={326}}{436C617373696669636174696F6E206F66204D4E617365207061747465726E73} \BKM@entry{id=52,dest={73756273656374696F6E2E332E382E33},srcline={339}}{5175616E74696679696E67206E75636C656F736F6D6520617272617920696E74656E736974792066726F6D20636C617373696669636174696F6E20726573756C7473} \BKM@entry{id=53,dest={73756273656374696F6E2E332E382E34},srcline={368}}{5065616B20636F6C6F63616C697A6174696F6E} \BKM@entry{id=54,dest={73756273656374696F6E2E332E382E35},srcline={372}}{4E445220646574656374696F6E} \BKM@entry{id=55,dest={73756273656374696F6E2E332E382E36},srcline={464}}{4354434620616E64204A756E4420696E7465726163746F7273} \BKM@entry{id=56,dest={73756273656374696F6E2E332E382E37},srcline={476}}{4542463120616E64206E75636C656F736F6D65} \@input{main/ch_spark.aux} \BKM@entry{id=57,dest={636861707465722E34},srcline={2}}{535061722D4B} \BKM@entry{id=58,dest={73656374696F6E2E342E31},srcline={15}}{416C676F726974686D} \pgfsyspdfmark {pgfid17}{0}{40463552} \pgfsyspdfmark {pgfid16}{6}{40511883} \BKM@entry{id=59,dest={73656374696F6E2E342E32},srcline={35}}{496D706C656D656E746174696F6E} \BKM@entry{id=60,dest={73656374696F6E2E342E33},srcline={39}}{42656E63686D61726B696E67} -\BKM@entry{id=61,dest={73756273656374696F6E2E342E332E31},srcline={73}}{4B2D6D65616E73} -\BKM@entry{id=62,dest={73756273656374696F6E2E342E332E32},srcline={76}}{43684950506172746974696F6E696E67} -\BKM@entry{id=63,dest={73756273656374696F6E2E342E332E33},srcline={79}}{44617461} -\BKM@entry{id=64,dest={73756273656374696F6E2E342E332E34},srcline={91}}{506572666F726D616E636573} -\BKM@entry{id=65,dest={73656374696F6E2E342E34},srcline={94}}{506172746974696F6E206F6620444E61736520616E64204D4E6173652064617461} -\BKM@entry{id=66,dest={73656374696F6E2E342E35},srcline={117}}{436F6E636C7573696F6E73} +\BKM@entry{id=61,dest={73756273656374696F6E2E342E332E31},srcline={74}}{4B2D6D65616E73} +\BKM@entry{id=62,dest={73756273656374696F6E2E342E332E32},srcline={77}}{43684950506172746974696F6E696E67} +\BKM@entry{id=63,dest={73756273656374696F6E2E342E332E33},srcline={80}}{44617461} +\BKM@entry{id=64,dest={73756273656374696F6E2E342E332E34},srcline={92}}{506572666F726D616E636573} +\BKM@entry{id=65,dest={73656374696F6E2E342E34},srcline={95}}{506172746974696F6E206F6620444E61736520616E64204D4E6173652064617461} +\BKM@entry{id=66,dest={73656374696F6E2E342E35},srcline={119}}{436F6E636C7573696F6E73} \@input{main/ch_smile-seq.aux} \BKM@entry{id=67,dest={636861707465722E35},srcline={2}}{534D694C452D736571206461746120616E616C79736973} \BKM@entry{id=68,dest={636861707465722E35},srcline={5}}{534D694C452D736571206461746120616E616C79736973} \BKM@entry{id=69,dest={73656374696F6E2E352E31},srcline={19}}{496E74726F64756374696F6E} \pgfsyspdfmark {pgfid19}{0}{40463552} \pgfsyspdfmark {pgfid18}{6}{40511883} \BKM@entry{id=70,dest={73656374696F6E2E352E32},srcline={36}}{48696464656E204D61726B6F76204D6F64656C204D6F74696620646973636F76657279} \BKM@entry{id=71,dest={73656374696F6E2E352E33},srcline={61}}{42696E64696E67206D6F746966206576616C756174696F6E} \BKM@entry{id=72,dest={73656374696F6E2E352E34},srcline={114}}{526573756C7473} \BKM@entry{id=73,dest={73656374696F6E2E352E35},srcline={134}}{436F6E636C7573696F6E73} \@input{main/ch_pwmscan.aux} \BKM@entry{id=74,dest={636861707465722E36},srcline={2}}{50574D5363616E} \BKM@entry{id=75,dest={73656374696F6E2E362E31},srcline={24}}{416C676F726974686D73} \BKM@entry{id=76,dest={73756273656374696F6E2E362E312E31},srcline={28}}{5363616E6E657220616C676F726974686D} \pgfsyspdfmark {pgfid21}{0}{40463552} \pgfsyspdfmark {pgfid20}{6}{40511883} \BKM@entry{id=77,dest={73756273656374696F6E2E362E312E32},srcline={34}}{4D61746368657320656E756D65726174696F6E20616E64206D617070696E67} \BKM@entry{id=78,dest={73656374696F6E2E362E32},srcline={46}}{504D575363616E20617263686974656374757265} \BKM@entry{id=79,dest={73656374696F6E2E362E33},srcline={81}}{42656E63686D61726B} \BKM@entry{id=80,dest={73656374696F6E2E362E34},srcline={153}}{436F6E636C7573696F6E73} \@input{main/ch_atac-seq.aux} \BKM@entry{id=81,dest={636861707465722E37},srcline={2}}{4368726F6D6174696E206163636573736962696C697479206F66206D6F6E6F6379746573} \BKM@entry{id=82,dest={73656374696F6E2E372E31},srcline={9}}{4D6F6E69746F72696E672054462062696E64696E67} \pgfsyspdfmark {pgfid23}{0}{40463552} \pgfsyspdfmark {pgfid22}{6}{40511883} \BKM@entry{id=83,dest={73656374696F6E2E372E32},srcline={18}}{54686520616476656E74206F662073696E676C652063656C6C20444746} \BKM@entry{id=84,dest={73656374696F6E2E372E33},srcline={44}}{4F70656E20697373756573} \BKM@entry{id=85,dest={73656374696F6E2E372E34},srcline={48}}{44617461} \BKM@entry{id=86,dest={73656374696F6E2E372E35},srcline={59}}{4964656E74696679696E67206F7665722D726570726573656E746564207369676E616C73} \BKM@entry{id=87,dest={73756273656374696F6E2E372E352E31},srcline={63}}{43684950506172746974696F6E696E6720616C676F726974686D} \BKM@entry{id=88,dest={73756273656374696F6E2E372E352E32},srcline={75}}{454D53657175656E636520616C676F726974686D} \BKM@entry{id=89,dest={73756273656374696F6E2E372E352E33},srcline={175}}{454D4A6F696E7420616C676F726974686D} \BKM@entry{id=90,dest={73756273656374696F6E2E372E352E34},srcline={212}}{44617461207265616C69676E6D656E74} \BKM@entry{id=91,dest={73756273656374696F6E2E372E352E35},srcline={225}}{536F6674206167677265676174696F6E20706C6F7473} \BKM@entry{id=92,dest={73656374696F6E2E372E36},srcline={235}}{446174612070726F63657373696E67} \BKM@entry{id=93,dest={73656374696F6E2E372E37},srcline={245}}{526573756C7473} \BKM@entry{id=94,dest={73756273656374696F6E2E372E372E31},srcline={249}}{416C69676E696E67207468652062696E64696E67207369746573} \BKM@entry{id=95,dest={73756273656374696F6E2E372E372E32},srcline={276}}{4578706C6F72696E6720696E646976696475616C20544620636C6173736573} \BKM@entry{id=96,dest={73656374696F6E2E372E38},srcline={292}}{44697363757373696F6E73} \BKM@entry{id=97,dest={73656374696F6E2E372E39},srcline={302}}{506572737065637469766573} \BKM@entry{id=98,dest={73656374696F6E2E372E3130},srcline={312}}{4D6574686F6473} \BKM@entry{id=99,dest={73756273656374696F6E2E372E31302E31},srcline={314}}{436F646520617661696C6162696C697479} \BKM@entry{id=100,dest={73756273656374696F6E2E372E31302E32},srcline={318}}{4461746120736F7572636573} \BKM@entry{id=101,dest={73756273656374696F6E2E372E31302E33},srcline={329}}{4461746120706F73742D70726F63657373696E67} \BKM@entry{id=102,dest={73756273656374696F6E2E372E31302E34},srcline={339}}{4D6F64656C20657874656E73696F6E} \BKM@entry{id=103,dest={73756273656374696F6E2E372E31302E35},srcline={351}}{45787472616374696E6720646174612061737369676E656420746F206120636C617373} \BKM@entry{id=104,dest={73756273656374696F6E2E372E31302E36},srcline={430}}{50726F6772616D73} \BKM@entry{id=105,dest={73756273656374696F6E2E372E31302E37},srcline={460}}{467261676D656E7420636C6173736573} \BKM@entry{id=106,dest={73756273656374696F6E2E372E31302E38},srcline={480}}{53696D756C617465642073657175656E636573} \BKM@entry{id=107,dest={73756273656374696F6E2E372E31302E39},srcline={483}}{42696E64696E6720736974652070726564696374696F6E} \BKM@entry{id=108,dest={73756273656374696F6E2E372E31302E3130},srcline={487}}{5265616C69676E6D656E74207573696E67204A4153504152206D6F74696673} \BKM@entry{id=109,dest={73756273656374696F6E2E372E31302E3131},srcline={551}}{506572205446207375622D636C6173736573} \@input{main/ch_discussion.aux} \BKM@entry{id=110,dest={636861707465722E38},srcline={2}}{44697363757373696F6E} \BKM@entry{id=111,dest={636861707465722E38},srcline={5}}{44697363757373696F6E73} \pgfsyspdfmark {pgfid25}{0}{40463552} \pgfsyspdfmark {pgfid24}{6}{40511883} \@writefile{toc}{\vspace {\normalbaselineskip }} \@input{tail/appendix.aux} \BKM@entry{id=112,dest={617070656E6469782E41},srcline={5}}{537570706C656D656E74617279206D6174657269616C} \BKM@entry{id=113,dest={73656374696F6E2E412E31},srcline={9}}{454E434F4445207065616B7320616E616C7973697320737570706C656D656E74617279206D6174657269616C} \pgfsyspdfmark {pgfid27}{0}{40463552} \pgfsyspdfmark {pgfid26}{-2013849}{40511883} \BKM@entry{id=114,dest={73656374696F6E2E412E32},srcline={102}}{535061722D4B20737570706C656D656E74617279206D6174657269616C} \BKM@entry{id=115,dest={73656374696F6E2E412E33},srcline={522}}{534D694C452D73657120737570706C656D656E74617279206D6174657269616C} \BKM@entry{id=116,dest={73656374696F6E2E412E34},srcline={534}}{4368726F6D6174696E206163636573736962696C697479206F66206D6F6E6F637974657320737570706C656D656E74617279206D6174657269616C} \BKM@entry{id=117,dest={73756273656374696F6E2E412E342E31},srcline={536}}{467261676D656E742073697A6520616E616C79736973} \BKM@entry{id=118,dest={73756273656374696F6E2E412E342E32},srcline={563}}{4D6561737572696E67206F70656E206368726F6D6174696E20616E64206E75636C656F736F6D65206F63637570616E6379} \BKM@entry{id=119,dest={73756273656374696F6E2E412E342E33},srcline={594}}{4576616C756174696F6E206F6620454D53657175656E636520616E642043684950506172746974696F6E696E67} \BKM@entry{id=120,dest={73756273656374696F6E2E412E342E34},srcline={694}}{4F7468657220737570706C656D656E746172792066696775726573} \@input{tail/biblio.aux} \BKM@entry{id=121,dest={73656374696F6E2A2E3634},srcline={3}}{4269626C696F677261706879} \pgfsyspdfmark {pgfid29}{0}{40463552} \pgfsyspdfmark {pgfid28}{6}{40498788} \BKM@entry{id=122,dest={617070656E6469782A2E3635},srcline={6}}{4269626C696F677261706879} \@input{tail/cv.aux} \BKM@entry{id=123,dest={73656374696F6E2A2E3636},srcline={4}}{437572726963756C756D205669746165} diff --git a/my_thesis.blg b/my_thesis.blg index ad60a71..88100ab 100644 --- a/my_thesis.blg +++ b/my_thesis.blg @@ -1,63 +1,62 @@ This is BibTeX, Version 0.99d (TeX Live 2017/Debian) Capacity: max_strings=100000, hash_size=100000, hash_prime=85009 The top-level auxiliary file: my_thesis.aux A level-1 auxiliary file: head/dedication.aux A level-1 auxiliary file: head/acknowledgements.aux A level-1 auxiliary file: head/abstracts.aux A level-1 auxiliary file: main/ch_introduction.aux A level-1 auxiliary file: main/ch_lab_resources.aux A level-1 auxiliary file: main/ch_encode_peaks.aux A level-1 auxiliary file: main/ch_spark.aux A level-1 auxiliary file: main/ch_smile-seq.aux A level-1 auxiliary file: main/ch_pwmscan.aux A level-1 auxiliary file: main/ch_atac-seq.aux A level-1 auxiliary file: main/ch_discussion.aux A level-1 auxiliary file: tail/appendix.aux A level-1 auxiliary file: tail/biblio.aux The style file: apalike.bst A level-1 auxiliary file: tail/cv.aux Database file #1: tail/bibliography.bib Warning--I didn't find a database entry for "" -Warning--I didn't find a database entry for "nielsen_catchprofiles" You've used 150 entries, 1935 wiz_defined-function locations, - 1236 strings with 45997 characters, + 1235 strings with 45976 characters, and the built_in function-call counts, 86730 in all, are: = -- 7865 > -- 6234 < -- 58 + -- 2349 - -- 2316 * -- 8872 := -- 15368 add.period$ -- 454 call.type$ -- 150 change.case$ -- 1906 chr.to.int$ -- 149 cite$ -- 150 duplicate$ -- 2170 empty$ -- 4530 format.name$ -- 2538 if$ -- 16042 int.to.chr$ -- 2 int.to.str$ -- 0 missing$ -- 151 newline$ -- 752 num.names$ -- 458 pop$ -- 1586 preamble$ -- 1 purify$ -- 1906 quote$ -- 0 skip$ -- 1619 stack$ -- 0 substring$ -- 5452 swap$ -- 211 text.length$ -- 10 text.prefix$ -- 0 top$ -- 0 type$ -- 896 warning$ -- 0 while$ -- 575 width$ -- 0 write$ -- 1960 -(There were 2 warnings) +(There was 1 warning) diff --git a/my_thesis.log b/my_thesis.log index 68ff325..1ea5105 100644 --- a/my_thesis.log +++ b/my_thesis.log @@ -1,4059 +1,4051 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12) 16 JAN 2020 16:14 +This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12) 17 JAN 2020 17:01 entering extended mode restricted \write18 enabled. %&-line parsing enabled. **my_thesis.tex (./my_thesis.tex LaTeX2e <2017-04-15> Babel <3.18> and hyphenation patterns for 84 language(s) loaded. (./head/settings_epfl_template.tex (/usr/share/texlive/texmf-dist/tex/latex/base/book.cls Document Class: book 2014/09/29 v1.4h Standard LaTeX document class (/usr/share/texlive/texmf-dist/tex/latex/base/bk11.clo File: bk11.clo 2014/09/29 v1.4h Standard LaTeX file (size option) ) \c@part=\count79 \c@chapter=\count80 \c@section=\count81 \c@subsection=\count82 \c@subsubsection=\count83 \c@paragraph=\count84 \c@subparagraph=\count85 \c@figure=\count86 \c@table=\count87 \abovecaptionskip=\skip41 \belowcaptionskip=\skip42 \bibindent=\dimen102 ) (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty Package: fontenc 2017/04/05 v2.0i Standard LaTeX package (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file LaTeX Font Info: Redeclaring font encoding T1 on input line 48. )) (/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty Package: inputenc 2015/03/17 v1.2c Input encoding file \inpenc@prehook=\toks14 \inpenc@posthook=\toks15 (/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def File: utf8.def 2017/01/28 v1.1t UTF-8 support for inputenc Now handling font encoding OML ... ... no UTF-8 mapping file for font encoding OML Now handling font encoding T1 ... ... processing UTF-8 mapping file for font encoding T1 (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu File: t1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A0 (decimal 160) defining Unicode char U+00A1 (decimal 161) defining Unicode char U+00A3 (decimal 163) defining Unicode char U+00AB (decimal 171) defining Unicode char U+00AD (decimal 173) defining Unicode char U+00BB (decimal 187) defining Unicode char U+00BF (decimal 191) defining Unicode char U+00C0 (decimal 192) defining Unicode char U+00C1 (decimal 193) defining Unicode char U+00C2 (decimal 194) defining Unicode char U+00C3 (decimal 195) defining Unicode char U+00C4 (decimal 196) defining Unicode char U+00C5 (decimal 197) defining Unicode char U+00C6 (decimal 198) defining Unicode char U+00C7 (decimal 199) defining Unicode char U+00C8 (decimal 200) defining Unicode char U+00C9 (decimal 201) defining Unicode char U+00CA (decimal 202) defining Unicode char U+00CB (decimal 203) defining Unicode char U+00CC (decimal 204) defining Unicode char U+00CD (decimal 205) defining Unicode char U+00CE (decimal 206) defining Unicode char U+00CF (decimal 207) defining Unicode char U+00D0 (decimal 208) defining Unicode char U+00D1 (decimal 209) defining Unicode char U+00D2 (decimal 210) defining Unicode char U+00D3 (decimal 211) defining Unicode char U+00D4 (decimal 212) defining Unicode char U+00D5 (decimal 213) defining Unicode char U+00D6 (decimal 214) defining Unicode char U+00D8 (decimal 216) defining Unicode char U+00D9 (decimal 217) defining Unicode char U+00DA (decimal 218) defining Unicode char U+00DB (decimal 219) defining Unicode char U+00DC (decimal 220) defining Unicode char U+00DD (decimal 221) defining Unicode char U+00DE (decimal 222) defining Unicode char U+00DF (decimal 223) defining Unicode char U+00E0 (decimal 224) defining Unicode char U+00E1 (decimal 225) defining Unicode char U+00E2 (decimal 226) defining Unicode char U+00E3 (decimal 227) defining Unicode char U+00E4 (decimal 228) defining Unicode char U+00E5 (decimal 229) defining Unicode char U+00E6 (decimal 230) defining Unicode char U+00E7 (decimal 231) defining Unicode char U+00E8 (decimal 232) defining Unicode char U+00E9 (decimal 233) defining Unicode char U+00EA (decimal 234) defining Unicode char U+00EB (decimal 235) defining Unicode char U+00EC (decimal 236) defining Unicode char U+00ED (decimal 237) defining Unicode char U+00EE (decimal 238) defining Unicode char U+00EF (decimal 239) defining Unicode char U+00F0 (decimal 240) defining Unicode char U+00F1 (decimal 241) defining Unicode char U+00F2 (decimal 242) defining Unicode char U+00F3 (decimal 243) defining Unicode char U+00F4 (decimal 244) defining Unicode char U+00F5 (decimal 245) defining Unicode char U+00F6 (decimal 246) defining Unicode char U+00F8 (decimal 248) defining Unicode char U+00F9 (decimal 249) defining Unicode char U+00FA (decimal 250) defining Unicode char U+00FB (decimal 251) defining Unicode char U+00FC (decimal 252) defining Unicode char U+00FD (decimal 253) defining Unicode char U+00FE (decimal 254) defining Unicode char U+00FF (decimal 255) defining Unicode char U+0100 (decimal 256) defining Unicode char U+0101 (decimal 257) defining Unicode char U+0102 (decimal 258) defining Unicode char U+0103 (decimal 259) defining Unicode char U+0104 (decimal 260) defining Unicode char U+0105 (decimal 261) defining Unicode char U+0106 (decimal 262) defining Unicode char U+0107 (decimal 263) defining Unicode char U+0108 (decimal 264) defining Unicode char U+0109 (decimal 265) defining Unicode char U+010A (decimal 266) defining Unicode char U+010B (decimal 267) defining Unicode char U+010C (decimal 268) defining Unicode char U+010D (decimal 269) defining Unicode char U+010E (decimal 270) defining Unicode char U+010F (decimal 271) defining Unicode char U+0110 (decimal 272) defining Unicode char U+0111 (decimal 273) defining Unicode char U+0112 (decimal 274) defining Unicode char U+0113 (decimal 275) defining Unicode char U+0114 (decimal 276) defining Unicode char U+0115 (decimal 277) defining Unicode char U+0116 (decimal 278) defining Unicode char U+0117 (decimal 279) defining Unicode char U+0118 (decimal 280) defining Unicode char U+0119 (decimal 281) defining Unicode char U+011A (decimal 282) defining Unicode char U+011B (decimal 283) defining Unicode char U+011C (decimal 284) defining Unicode char U+011D (decimal 285) defining Unicode char U+011E (decimal 286) defining Unicode char U+011F (decimal 287) defining Unicode char U+0120 (decimal 288) defining Unicode char U+0121 (decimal 289) defining Unicode char U+0122 (decimal 290) defining Unicode char U+0123 (decimal 291) defining Unicode char U+0124 (decimal 292) defining Unicode char U+0125 (decimal 293) defining Unicode char U+0128 (decimal 296) defining Unicode char U+0129 (decimal 297) defining Unicode char U+012A (decimal 298) defining Unicode char U+012B (decimal 299) defining Unicode char U+012C (decimal 300) defining Unicode char U+012D (decimal 301) defining Unicode char U+012E (decimal 302) defining Unicode char U+012F (decimal 303) defining Unicode char U+0130 (decimal 304) defining Unicode char U+0131 (decimal 305) defining Unicode char U+0132 (decimal 306) defining Unicode char U+0133 (decimal 307) defining Unicode char U+0134 (decimal 308) defining Unicode char U+0135 (decimal 309) defining Unicode char U+0136 (decimal 310) defining Unicode char U+0137 (decimal 311) defining Unicode char U+0139 (decimal 313) defining Unicode char U+013A (decimal 314) defining Unicode char U+013B (decimal 315) defining Unicode char U+013C (decimal 316) defining Unicode char U+013D (decimal 317) defining Unicode char U+013E (decimal 318) defining Unicode char U+0141 (decimal 321) defining Unicode char U+0142 (decimal 322) defining Unicode char U+0143 (decimal 323) defining Unicode char U+0144 (decimal 324) defining Unicode char U+0145 (decimal 325) defining Unicode char U+0146 (decimal 326) defining Unicode char U+0147 (decimal 327) defining Unicode char U+0148 (decimal 328) defining Unicode char U+014A (decimal 330) defining Unicode char U+014B (decimal 331) defining Unicode char U+014C (decimal 332) defining Unicode char U+014D (decimal 333) defining Unicode char U+014E (decimal 334) defining Unicode char U+014F (decimal 335) defining Unicode char U+0150 (decimal 336) defining Unicode char U+0151 (decimal 337) defining Unicode char U+0152 (decimal 338) defining Unicode char U+0153 (decimal 339) defining Unicode char U+0154 (decimal 340) defining Unicode char U+0155 (decimal 341) defining Unicode char U+0156 (decimal 342) defining Unicode char U+0157 (decimal 343) defining Unicode char U+0158 (decimal 344) defining Unicode char U+0159 (decimal 345) defining Unicode char U+015A (decimal 346) defining Unicode char U+015B (decimal 347) defining Unicode char U+015C (decimal 348) defining Unicode char U+015D (decimal 349) defining Unicode char U+015E (decimal 350) defining Unicode char U+015F (decimal 351) defining Unicode char U+0160 (decimal 352) defining Unicode char U+0161 (decimal 353) defining Unicode char U+0162 (decimal 354) defining Unicode char U+0163 (decimal 355) defining Unicode char U+0164 (decimal 356) defining Unicode char U+0165 (decimal 357) defining Unicode char U+0168 (decimal 360) defining Unicode char U+0169 (decimal 361) defining Unicode char U+016A (decimal 362) defining Unicode char U+016B (decimal 363) defining Unicode char U+016C (decimal 364) defining Unicode char U+016D (decimal 365) defining Unicode char U+016E (decimal 366) defining Unicode char U+016F (decimal 367) defining Unicode char U+0170 (decimal 368) defining Unicode char U+0171 (decimal 369) defining Unicode char U+0172 (decimal 370) defining Unicode char U+0173 (decimal 371) defining Unicode char U+0174 (decimal 372) defining Unicode char U+0175 (decimal 373) defining Unicode char U+0176 (decimal 374) defining Unicode char U+0177 (decimal 375) defining Unicode char U+0178 (decimal 376) defining Unicode char U+0179 (decimal 377) defining Unicode char U+017A (decimal 378) defining Unicode char U+017B (decimal 379) defining Unicode char U+017C (decimal 380) defining Unicode char U+017D (decimal 381) defining Unicode char U+017E (decimal 382) defining Unicode char U+01CD (decimal 461) defining Unicode char U+01CE (decimal 462) defining Unicode char U+01CF (decimal 463) defining Unicode char U+01D0 (decimal 464) defining Unicode char U+01D1 (decimal 465) defining Unicode char U+01D2 (decimal 466) defining Unicode char U+01D3 (decimal 467) defining Unicode char U+01D4 (decimal 468) defining Unicode char U+01E2 (decimal 482) defining Unicode char U+01E3 (decimal 483) defining Unicode char U+01E6 (decimal 486) defining Unicode char U+01E7 (decimal 487) defining Unicode char U+01E8 (decimal 488) defining Unicode char U+01E9 (decimal 489) defining Unicode char U+01EA (decimal 490) defining Unicode char U+01EB (decimal 491) defining Unicode char U+01F0 (decimal 496) defining Unicode char U+01F4 (decimal 500) defining Unicode char U+01F5 (decimal 501) defining Unicode char U+0218 (decimal 536) defining Unicode char U+0219 (decimal 537) defining Unicode char U+021A (decimal 538) defining Unicode char U+021B (decimal 539) defining Unicode char U+0232 (decimal 562) defining Unicode char U+0233 (decimal 563) defining Unicode char U+1E02 (decimal 7682) defining Unicode char U+1E03 (decimal 7683) defining Unicode char U+200C (decimal 8204) defining Unicode char U+2010 (decimal 8208) defining Unicode char U+2011 (decimal 8209) defining Unicode char U+2012 (decimal 8210) defining Unicode char U+2013 (decimal 8211) defining Unicode char U+2014 (decimal 8212) defining Unicode char U+2015 (decimal 8213) defining Unicode char U+2018 (decimal 8216) defining Unicode char U+2019 (decimal 8217) defining Unicode char U+201A (decimal 8218) defining Unicode char U+201C (decimal 8220) defining Unicode char U+201D (decimal 8221) defining Unicode char U+201E (decimal 8222) defining Unicode char U+2030 (decimal 8240) defining Unicode char U+2031 (decimal 8241) defining Unicode char U+2039 (decimal 8249) defining Unicode char U+203A (decimal 8250) defining Unicode char U+2423 (decimal 9251) defining Unicode char U+1E20 (decimal 7712) defining Unicode char U+1E21 (decimal 7713) ) Now handling font encoding OT1 ... ... processing UTF-8 mapping file for font encoding OT1 (/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu File: ot1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A0 (decimal 160) defining Unicode char U+00A1 (decimal 161) defining Unicode char U+00A3 (decimal 163) defining Unicode char U+00AD (decimal 173) defining Unicode char U+00B8 (decimal 184) defining Unicode char U+00BF (decimal 191) defining Unicode char U+00C5 (decimal 197) defining Unicode char U+00C6 (decimal 198) defining Unicode char U+00D8 (decimal 216) defining Unicode char U+00DF (decimal 223) defining Unicode char U+00E6 (decimal 230) defining Unicode char U+00EC (decimal 236) defining Unicode char U+00ED (decimal 237) defining Unicode char U+00EE (decimal 238) defining Unicode char U+00EF (decimal 239) defining Unicode char U+00F8 (decimal 248) defining Unicode char U+0131 (decimal 305) defining Unicode char U+0141 (decimal 321) defining Unicode char U+0142 (decimal 322) defining Unicode char U+0152 (decimal 338) defining Unicode char U+0153 (decimal 339) defining Unicode char U+0174 (decimal 372) defining Unicode char U+0175 (decimal 373) defining Unicode char U+0176 (decimal 374) defining Unicode char U+0177 (decimal 375) defining Unicode char U+0218 (decimal 536) defining Unicode char U+0219 (decimal 537) defining Unicode char U+021A (decimal 538) defining Unicode char U+021B (decimal 539) defining Unicode char U+2013 (decimal 8211) defining Unicode char U+2014 (decimal 8212) defining Unicode char U+2018 (decimal 8216) defining Unicode char U+2019 (decimal 8217) defining Unicode char U+201C (decimal 8220) defining Unicode char U+201D (decimal 8221) ) Now handling font encoding OMS ... ... processing UTF-8 mapping file for font encoding OMS (/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu File: omsenc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A7 (decimal 167) defining Unicode char U+00B6 (decimal 182) defining Unicode char U+00B7 (decimal 183) defining Unicode char U+2020 (decimal 8224) defining Unicode char U+2021 (decimal 8225) defining Unicode char U+2022 (decimal 8226) ) Now handling font encoding OMX ... ... no UTF-8 mapping file for font encoding OMX Now handling font encoding U ... ... no UTF-8 mapping file for font encoding U defining Unicode char U+00A9 (decimal 169) defining Unicode char U+00AA (decimal 170) defining Unicode char U+00AE (decimal 174) defining Unicode char U+00BA (decimal 186) defining Unicode char U+02C6 (decimal 710) defining Unicode char U+02DC (decimal 732) defining Unicode char U+200C (decimal 8204) defining Unicode char U+2026 (decimal 8230) defining Unicode char U+2122 (decimal 8482) defining Unicode char U+2423 (decimal 9251) )) (/usr/share/texlive/texmf-dist/tex/latex/natbib/natbib.sty Package: natbib 2010/09/13 8.31b (PWD, AO) \bibhang=\skip43 \bibsep=\skip44 LaTeX Info: Redefining \cite on input line 694. \c@NAT@ctr=\count88 ) (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.sty Package: babel 2018/02/14 3.18 The Babel package (/usr/share/texlive/texmf-dist/tex/generic/babel/switch.def File: switch.def 2018/02/14 3.18 Babel switching mechanism ) (/usr/share/texlive/texmf-dist/tex/generic/babel-french/french.ldf Language: french 2018/02/04 v3.4b French support from the babel system (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.def File: babel.def 2018/02/14 3.18 Babel common definitions \babel@savecnt=\count89 \U@D=\dimen103 (/usr/share/texlive/texmf-dist/tex/generic/babel/txtbabel.def) \bbl@dirlevel=\count90 ) \l@acadian = a dialect from \language\l@french \FB@nonchar=\count91 Package babel Info: Making : an active character on input line 411. Package babel Info: Making ; an active character on input line 412. Package babel Info: Making ! an active character on input line 413. Package babel Info: Making ? an active character on input line 414. \FBguill@level=\count92 \FB@everypar=\toks16 \FB@Mht=\dimen104 \mc@charclass=\count93 \mc@charfam=\count94 \mc@charslot=\count95 \std@mcc=\count96 \dec@mcc=\count97 \c@FBcaption@count=\count98 \listindentFB=\skip45 \descindentFB=\skip46 \labelwidthFB=\skip47 \leftmarginFB=\skip48 \parindentFFN=\dimen105 \FBfnindent=\skip49 ) (/usr/share/texlive/texmf-dist/tex/generic/babel-german/german.ldf Language: german 2016/11/02 v2.9 German support for babel (traditional orthogra phy) (/usr/share/texlive/texmf-dist/tex/generic/babel-german/germanb.ldf Language: germanb 2016/11/02 v2.9 German support for babel (traditional orthogr aphy) Package babel Info: Making " an active character on input line 139. )) (/usr/share/texlive/texmf-dist/tex/generic/babel-english/english.ldf Language: english 2017/06/06 v3.3r English support from the babel system \l@canadian = a dialect from \language\l@american \l@australian = a dialect from \language\l@british \l@newzealand = a dialect from \language\l@british )) (/usr/share/texlive/texmf-dist/tex/latex/carlisle/scalefnt.sty) (/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty Package: keyval 2014/10/28 v1.15 key=value parser (DPC) \KV@toks@=\toks17 ) (/usr/share/texmf/tex/latex/lm/lmodern.sty Package: lmodern 2009/10/30 v1.6 Latin Modern Fonts LaTeX Font Info: Overwriting symbol font `operators' in version `normal' (Font) OT1/cmr/m/n --> OT1/lmr/m/n on input line 22. LaTeX Font Info: Overwriting symbol font `letters' in version `normal' (Font) OML/cmm/m/it --> OML/lmm/m/it on input line 23. LaTeX Font Info: Overwriting symbol font `symbols' in version `normal' (Font) OMS/cmsy/m/n --> OMS/lmsy/m/n on input line 24. LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal' (Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 25. LaTeX Font Info: Overwriting symbol font `operators' in version `bold' (Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 26. LaTeX Font Info: Overwriting symbol font `letters' in version `bold' (Font) OML/cmm/b/it --> OML/lmm/b/it on input line 27. LaTeX Font Info: Overwriting symbol font `symbols' in version `bold' (Font) OMS/cmsy/b/n --> OMS/lmsy/b/n on input line 28. LaTeX Font Info: Overwriting symbol font `largesymbols' in version `bold' (Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 29. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' (Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 31. LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal' (Font) OT1/cmss/m/n --> OT1/lmss/m/n on input line 32. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' (Font) OT1/cmr/m/it --> OT1/lmr/m/it on input line 33. LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal' (Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 34. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `bold' (Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 35. LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold' (Font) OT1/cmss/bx/n --> OT1/lmss/bx/n on input line 36. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' (Font) OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37. LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold' (Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38. ) (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier.sty Package: fourier 2005/01/01 1.4 fourier-GUTenberg package Now handling font encoding FML ... ... no UTF-8 mapping file for font encoding FML Now handling font encoding FMS ... ... no UTF-8 mapping file for font encoding FMS Now handling font encoding FMX ... ... no UTF-8 mapping file for font encoding FMX (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty Package: fontenc 2017/04/05 v2.0i Standard LaTeX package (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file LaTeX Font Info: Redeclaring font encoding T1 on input line 48. )) (/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty Package: textcomp 2017/04/05 v2.0i Standard LaTeX package Package textcomp Info: Sub-encoding information: (textcomp) 5 = only ISO-Adobe without \textcurrency (textcomp) 4 = 5 + \texteuro (textcomp) 3 = 4 + \textohm (textcomp) 2 = 3 + \textestimated + \textcurrency (textcomp) 1 = TS1 - \textcircled - \t (textcomp) 0 = TS1 (full) (textcomp) Font families with sub-encoding setting implement (textcomp) only a restricted character set as indicated. (textcomp) Family '?' is the default used for unknown fonts. (textcomp) See the documentation for details. Package textcomp Info: Setting ? sub-encoding to TS1/1 on input line 79. (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file Now handling font encoding TS1 ... ... processing UTF-8 mapping file for font encoding TS1 (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu File: ts1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A2 (decimal 162) defining Unicode char U+00A3 (decimal 163) defining Unicode char U+00A4 (decimal 164) defining Unicode char U+00A5 (decimal 165) defining Unicode char U+00A6 (decimal 166) defining Unicode char U+00A7 (decimal 167) defining Unicode char U+00A8 (decimal 168) defining Unicode char U+00A9 (decimal 169) defining Unicode char U+00AA (decimal 170) defining Unicode char U+00AC (decimal 172) defining Unicode char U+00AE (decimal 174) defining Unicode char U+00AF (decimal 175) defining Unicode char U+00B0 (decimal 176) defining Unicode char U+00B1 (decimal 177) defining Unicode char U+00B2 (decimal 178) defining Unicode char U+00B3 (decimal 179) defining Unicode char U+00B4 (decimal 180) defining Unicode char U+00B5 (decimal 181) defining Unicode char U+00B6 (decimal 182) defining Unicode char U+00B7 (decimal 183) defining Unicode char U+00B9 (decimal 185) defining Unicode char U+00BA (decimal 186) defining Unicode char U+00BC (decimal 188) defining Unicode char U+00BD (decimal 189) defining Unicode char U+00BE (decimal 190) defining Unicode char U+00D7 (decimal 215) defining Unicode char U+00F7 (decimal 247) defining Unicode char U+0192 (decimal 402) defining Unicode char U+02C7 (decimal 711) defining Unicode char U+02D8 (decimal 728) defining Unicode char U+02DD (decimal 733) defining Unicode char U+0E3F (decimal 3647) defining Unicode char U+2016 (decimal 8214) defining Unicode char U+2020 (decimal 8224) defining Unicode char U+2021 (decimal 8225) defining Unicode char U+2022 (decimal 8226) defining Unicode char U+2030 (decimal 8240) defining Unicode char U+2031 (decimal 8241) defining Unicode char U+203B (decimal 8251) defining Unicode char U+203D (decimal 8253) defining Unicode char U+2044 (decimal 8260) defining Unicode char U+204E (decimal 8270) defining Unicode char U+2052 (decimal 8274) defining Unicode char U+20A1 (decimal 8353) defining Unicode char U+20A4 (decimal 8356) defining Unicode char U+20A6 (decimal 8358) defining Unicode char U+20A9 (decimal 8361) defining Unicode char U+20AB (decimal 8363) defining Unicode char U+20AC (decimal 8364) defining Unicode char U+20B1 (decimal 8369) defining Unicode char U+2103 (decimal 8451) defining Unicode char U+2116 (decimal 8470) defining Unicode char U+2117 (decimal 8471) defining Unicode char U+211E (decimal 8478) defining Unicode char U+2120 (decimal 8480) defining Unicode char U+2122 (decimal 8482) defining Unicode char U+2126 (decimal 8486) defining Unicode char U+2127 (decimal 8487) defining Unicode char U+212E (decimal 8494) defining Unicode char U+2190 (decimal 8592) defining Unicode char U+2191 (decimal 8593) defining Unicode char U+2192 (decimal 8594) defining Unicode char U+2193 (decimal 8595) defining Unicode char U+2329 (decimal 9001) defining Unicode char U+232A (decimal 9002) defining Unicode char U+2422 (decimal 9250) defining Unicode char U+25E6 (decimal 9702) defining Unicode char U+25EF (decimal 9711) defining Unicode char U+266A (decimal 9834) )) LaTeX Info: Redefining \oldstylenums on input line 334. Package textcomp Info: Setting cmr sub-encoding to TS1/0 on input line 349. Package textcomp Info: Setting cmss sub-encoding to TS1/0 on input line 350. Package textcomp Info: Setting cmtt sub-encoding to TS1/0 on input line 351. Package textcomp Info: Setting cmvtt sub-encoding to TS1/0 on input line 352. Package textcomp Info: Setting cmbr sub-encoding to TS1/0 on input line 353. Package textcomp Info: Setting cmtl sub-encoding to TS1/0 on input line 354. Package textcomp Info: Setting ccr sub-encoding to TS1/0 on input line 355. Package textcomp Info: Setting ptm sub-encoding to TS1/4 on input line 356. Package textcomp Info: Setting pcr sub-encoding to TS1/4 on input line 357. Package textcomp Info: Setting phv sub-encoding to TS1/4 on input line 358. Package textcomp Info: Setting ppl sub-encoding to TS1/3 on input line 359. Package textcomp Info: Setting pag sub-encoding to TS1/4 on input line 360. Package textcomp Info: Setting pbk sub-encoding to TS1/4 on input line 361. Package textcomp Info: Setting pnc sub-encoding to TS1/4 on input line 362. Package textcomp Info: Setting pzc sub-encoding to TS1/4 on input line 363. Package textcomp Info: Setting bch sub-encoding to TS1/4 on input line 364. Package textcomp Info: Setting put sub-encoding to TS1/5 on input line 365. Package textcomp Info: Setting uag sub-encoding to TS1/5 on input line 366. Package textcomp Info: Setting ugq sub-encoding to TS1/5 on input line 367. Package textcomp Info: Setting ul8 sub-encoding to TS1/4 on input line 368. Package textcomp Info: Setting ul9 sub-encoding to TS1/4 on input line 369. Package textcomp Info: Setting augie sub-encoding to TS1/5 on input line 370. Package textcomp Info: Setting dayrom sub-encoding to TS1/3 on input line 371. Package textcomp Info: Setting dayroms sub-encoding to TS1/3 on input line 372. Package textcomp Info: Setting pxr sub-encoding to TS1/0 on input line 373. Package textcomp Info: Setting pxss sub-encoding to TS1/0 on input line 374. Package textcomp Info: Setting pxtt sub-encoding to TS1/0 on input line 375. Package textcomp Info: Setting txr sub-encoding to TS1/0 on input line 376. Package textcomp Info: Setting txss sub-encoding to TS1/0 on input line 377. Package textcomp Info: Setting txtt sub-encoding to TS1/0 on input line 378. Package textcomp Info: Setting lmr sub-encoding to TS1/0 on input line 379. Package textcomp Info: Setting lmdh sub-encoding to TS1/0 on input line 380. Package textcomp Info: Setting lmss sub-encoding to TS1/0 on input line 381. Package textcomp Info: Setting lmssq sub-encoding to TS1/0 on input line 382. Package textcomp Info: Setting lmvtt sub-encoding to TS1/0 on input line 383. Package textcomp Info: Setting lmtt sub-encoding to TS1/0 on input line 384. Package textcomp Info: Setting qhv sub-encoding to TS1/0 on input line 385. Package textcomp Info: Setting qag sub-encoding to TS1/0 on input line 386. Package textcomp Info: Setting qbk sub-encoding to TS1/0 on input line 387. Package textcomp Info: Setting qcr sub-encoding to TS1/0 on input line 388. Package textcomp Info: Setting qcs sub-encoding to TS1/0 on input line 389. Package textcomp Info: Setting qpl sub-encoding to TS1/0 on input line 390. Package textcomp Info: Setting qtm sub-encoding to TS1/0 on input line 391. Package textcomp Info: Setting qzc sub-encoding to TS1/0 on input line 392. Package textcomp Info: Setting qhvc sub-encoding to TS1/0 on input line 393. Package textcomp Info: Setting futs sub-encoding to TS1/4 on input line 394. Package textcomp Info: Setting futx sub-encoding to TS1/4 on input line 395. Package textcomp Info: Setting futj sub-encoding to TS1/4 on input line 396. Package textcomp Info: Setting hlh sub-encoding to TS1/3 on input line 397. Package textcomp Info: Setting hls sub-encoding to TS1/3 on input line 398. Package textcomp Info: Setting hlst sub-encoding to TS1/3 on input line 399. Package textcomp Info: Setting hlct sub-encoding to TS1/5 on input line 400. Package textcomp Info: Setting hlx sub-encoding to TS1/5 on input line 401. Package textcomp Info: Setting hlce sub-encoding to TS1/5 on input line 402. Package textcomp Info: Setting hlcn sub-encoding to TS1/5 on input line 403. Package textcomp Info: Setting hlcw sub-encoding to TS1/5 on input line 404. Package textcomp Info: Setting hlcf sub-encoding to TS1/5 on input line 405. Package textcomp Info: Setting pplx sub-encoding to TS1/3 on input line 406. Package textcomp Info: Setting pplj sub-encoding to TS1/3 on input line 407. Package textcomp Info: Setting ptmx sub-encoding to TS1/4 on input line 408. Package textcomp Info: Setting ptmj sub-encoding to TS1/4 on input line 409. ) (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier-orns.sty Package: fourier-orns 2004/01/30 1.1 fourier-ornaments package ) LaTeX Font Info: Redeclaring symbol font `operators' on input line 50. LaTeX Font Info: Encoding `OT1' has changed to `T1' for symbol font (Font) `operators' in the math version `normal' on input line 50. LaTeX Font Info: Overwriting symbol font `operators' in version `normal' (Font) OT1/lmr/m/n --> T1/futs/m/n on input line 50. LaTeX Font Info: Encoding `OT1' has changed to `T1' for symbol font (Font) `operators' in the math version `bold' on input line 50. LaTeX Font Info: Overwriting symbol font `operators' in version `bold' (Font) OT1/lmr/bx/n --> T1/futs/m/n on input line 50. LaTeX Font Info: Overwriting symbol font `operators' in version `bold' (Font) T1/futs/m/n --> T1/futs/b/n on input line 51. LaTeX Font Info: Redeclaring symbol font `letters' on input line 59. LaTeX Font Info: Encoding `OML' has changed to `FML' for symbol font (Font) `letters' in the math version `normal' on input line 59. LaTeX Font Info: Overwriting symbol font `letters' in version `normal' (Font) OML/lmm/m/it --> FML/futmi/m/it on input line 59. LaTeX Font Info: Encoding `OML' has changed to `FML' for symbol font (Font) `letters' in the math version `bold' on input line 59. LaTeX Font Info: Overwriting symbol font `letters' in version `bold' (Font) OML/lmm/b/it --> FML/futmi/m/it on input line 59. \symotherletters=\mathgroup4 LaTeX Font Info: Overwriting symbol font `letters' in version `bold' (Font) FML/futmi/m/it --> FML/futmi/b/it on input line 61. LaTeX Font Info: Overwriting symbol font `otherletters' in version `bold' (Font) FML/futm/m/it --> FML/futm/b/it on input line 62. LaTeX Font Info: Redeclaring math symbol \Gamma on input line 63. LaTeX Font Info: Redeclaring math symbol \Delta on input line 64. LaTeX Font Info: Redeclaring math symbol \Theta on input line 65. LaTeX Font Info: Redeclaring math symbol \Lambda on input line 66. LaTeX Font Info: Redeclaring math symbol \Xi on input line 67. LaTeX Font Info: Redeclaring math symbol \Pi on input line 68. LaTeX Font Info: Redeclaring math symbol \Sigma on input line 69. LaTeX Font Info: Redeclaring math symbol \Upsilon on input line 70. LaTeX Font Info: Redeclaring math symbol \Phi on input line 71. LaTeX Font Info: Redeclaring math symbol \Psi on input line 72. LaTeX Font Info: Redeclaring math symbol \Omega on input line 73. LaTeX Font Info: Redeclaring symbol font `symbols' on input line 113. LaTeX Font Info: Encoding `OMS' has changed to `FMS' for symbol font (Font) `symbols' in the math version `normal' on input line 113. LaTeX Font Info: Overwriting symbol font `symbols' in version `normal' (Font) OMS/lmsy/m/n --> FMS/futm/m/n on input line 113. LaTeX Font Info: Encoding `OMS' has changed to `FMS' for symbol font (Font) `symbols' in the math version `bold' on input line 113. LaTeX Font Info: Overwriting symbol font `symbols' in version `bold' (Font) OMS/lmsy/b/n --> FMS/futm/m/n on input line 113. LaTeX Font Info: Redeclaring symbol font `largesymbols' on input line 114. LaTeX Font Info: Encoding `OMX' has changed to `FMX' for symbol font (Font) `largesymbols' in the math version `normal' on input line 1 14. LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal' (Font) OMX/lmex/m/n --> FMX/futm/m/n on input line 114. LaTeX Font Info: Encoding `OMX' has changed to `FMX' for symbol font (Font) `largesymbols' in the math version `bold' on input line 114 . LaTeX Font Info: Overwriting symbol font `largesymbols' in version `bold' (Font) OMX/lmex/m/n --> FMX/futm/m/n on input line 114. LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 115. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' (Font) OT1/lmr/bx/n --> T1/futs/bx/n on input line 115. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `bold' (Font) OT1/lmr/bx/n --> T1/futs/bx/n on input line 115. LaTeX Font Info: Redeclaring math alphabet \mathrm on input line 116. LaTeX Font Info: Redeclaring math alphabet \mathit on input line 117. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' (Font) OT1/lmr/m/it --> T1/futs/m/it on input line 117. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' (Font) OT1/lmr/bx/it --> T1/futs/m/it on input line 117. LaTeX Font Info: Redeclaring math alphabet \mathcal on input line 118. LaTeX Font Info: Redeclaring math symbol \parallel on input line 134. LaTeX Font Info: Redeclaring math symbol \hbar on input line 148. LaTeX Font Info: Redeclaring math symbol \varkappa on input line 186. LaTeX Font Info: Redeclaring math symbol \varvarrho on input line 187. LaTeX Font Info: Redeclaring math delimiter \Vert on input line 210. LaTeX Font Info: Redeclaring math delimiter \vert on input line 215. LaTeX Font Info: Redeclaring math delimiter \Downarrow on input line 225. LaTeX Font Info: Redeclaring math delimiter \backslash on input line 227. LaTeX Font Info: Redeclaring math delimiter \rangle on input line 229. LaTeX Font Info: Redeclaring math delimiter \langle on input line 231. LaTeX Font Info: Redeclaring math delimiter \rbrace on input line 233. LaTeX Font Info: Redeclaring math delimiter \lbrace on input line 235. LaTeX Font Info: Redeclaring math delimiter \rceil on input line 237. LaTeX Font Info: Redeclaring math delimiter \lceil on input line 239. LaTeX Font Info: Redeclaring math delimiter \rfloor on input line 241. LaTeX Font Info: Redeclaring math delimiter \lfloor on input line 243. LaTeX Font Info: Redeclaring math accent \acute on input line 247. LaTeX Font Info: Redeclaring math accent \grave on input line 248. LaTeX Font Info: Redeclaring math accent \ddot on input line 249. LaTeX Font Info: Redeclaring math accent \tilde on input line 250. LaTeX Font Info: Redeclaring math accent \bar on input line 251. LaTeX Font Info: Redeclaring math accent \breve on input line 252. LaTeX Font Info: Redeclaring math accent \check on input line 253. LaTeX Font Info: Redeclaring math accent \hat on input line 254. LaTeX Font Info: Redeclaring math accent \dot on input line 255. LaTeX Font Info: Redeclaring math accent \mathring on input line 256. \symUfutm=\mathgroup5 ) (/usr/share/texlive/texmf-dist/tex/latex/setspace/setspace.sty Package: setspace 2011/12/19 v6.7a set line spacing ) (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphicx.sty Package: graphicx 2017/06/01 v1.1a Enhanced LaTeX Graphics (DPC,SPQR) (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphics.sty Package: graphics 2017/06/25 v1.2c Standard LaTeX Graphics (DPC,SPQR) (/usr/share/texlive/texmf-dist/tex/latex/graphics/trig.sty Package: trig 2016/01/03 v1.10 sin cos tan (DPC) ) (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/graphics.cfg File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration ) Package graphics Info: Driver file: pdftex.def on input line 99. (/usr/share/texlive/texmf-dist/tex/latex/graphics-def/pdftex.def File: pdftex.def 2018/01/08 v1.0l Graphics/color driver for pdftex )) \Gin@req@height=\dimen106 \Gin@req@width=\dimen107 ) (/usr/share/texlive/texmf-dist/tex/latex/xcolor/xcolor.sty Package: xcolor 2016/05/11 v2.12 LaTeX color extensions (UK) (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/color.cfg File: color.cfg 2016/01/02 v1.6 sample color configuration ) Package xcolor Info: Driver file: pdftex.def on input line 225. Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1348. Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1352. Package xcolor Info: Model `RGB' extended on input line 1364. Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1366. Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1367. Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1368. Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1369. Package xcolor Info: Model `Gray' substituted by `gray' on input line 1370. Package xcolor Info: Model `wave' substituted by `hsb' on input line 1371. ) (/usr/share/texlive/texmf-dist/tex/latex/subfig/subfig.sty Package: subfig 2005/06/28 ver: 1.3 subfig package (/usr/share/texlive/texmf-dist/tex/latex/caption/caption.sty Package: caption 2016/02/21 v3.3-144 Customizing captions (AR) (/usr/share/texlive/texmf-dist/tex/latex/caption/caption3.sty Package: caption3 2016/05/22 v1.7-166 caption3 kernel (AR) Package caption3 Info: TeX engine: e-TeX on input line 67. \captionmargin=\dimen108 \captionmargin@=\dimen109 \captionwidth=\dimen110 \caption@tempdima=\dimen111 \caption@indent=\dimen112 \caption@parindent=\dimen113 \caption@hangindent=\dimen114 ) \c@ContinuedFloat=\count99 ) \c@KVtest=\count100 \sf@farskip=\skip50 \sf@captopadj=\dimen115 \sf@capskip=\skip51 \sf@nearskip=\skip52 \c@subfigure=\count101 \c@subfigure@save=\count102 \c@lofdepth=\count103 \c@subtable=\count104 \c@subtable@save=\count105 \c@lotdepth=\count106 \sf@top=\skip53 \sf@bottom=\skip54 ) (/usr/share/texlive/texmf-dist/tex/latex/booktabs/booktabs.sty Package: booktabs 2016/04/27 v1.618033 publication quality tables \heavyrulewidth=\dimen116 \lightrulewidth=\dimen117 \cmidrulewidth=\dimen118 \belowrulesep=\dimen119 \belowbottomsep=\dimen120 \aboverulesep=\dimen121 \abovetopsep=\dimen122 \cmidrulesep=\dimen123 \cmidrulekern=\dimen124 \defaultaddspace=\dimen125 \@cmidla=\count107 \@cmidlb=\count108 \@aboverulesep=\dimen126 \@belowrulesep=\dimen127 \@thisruleclass=\count109 \@lastruleclass=\count110 \@thisrulewidth=\dimen128 ) (/usr/share/texlive/texmf-dist/tex/latex/lipsum/lipsum.sty Package: lipsum 2014/07/27 v1.3 150 paragraphs of Lorem Ipsum dummy text \c@lips@count=\count111 ) (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.sty Package: microtype 2018/01/14 v2.7a Micro-typographical refinements (RS) \MT@toks=\toks18 \MT@count=\count112 LaTeX Info: Redefining \textls on input line 793. \MT@outer@kern=\dimen129 LaTeX Info: Redefining \textmicrotypecontext on input line 1339. \MT@listname@count=\count113 (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype-pdftex.def File: microtype-pdftex.def 2018/01/14 v2.7a Definitions specific to pdftex (RS) LaTeX Info: Redefining \lsstyle on input line 913. LaTeX Info: Redefining \lslig on input line 913. \MT@outer@space=\skip55 ) Package microtype Info: Loading configuration file microtype.cfg. (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.cfg File: microtype.cfg 2018/01/14 v2.7a microtype main configuration file (RS) )) (/usr/share/texlive/texmf-dist/tex/latex/url/url.sty \Urlmuskip=\muskip10 Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. ) (/usr/share/texlive/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty Package: fancyhdr 2017/06/30 v3.9a Extensive control of page headers and footer s \f@nch@headwidth=\skip56 \f@nch@O@elh=\skip57 \f@nch@O@erh=\skip58 \f@nch@O@olh=\skip59 \f@nch@O@orh=\skip60 \f@nch@O@elf=\skip61 \f@nch@O@erf=\skip62 \f@nch@O@olf=\skip63 \f@nch@O@orf=\skip64 ) (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.sty \lst@mode=\count114 \lst@gtempboxa=\box26 \lst@token=\toks19 \lst@length=\count115 \lst@currlwidth=\dimen130 \lst@column=\count116 \lst@pos=\count117 \lst@lostspace=\dimen131 \lst@width=\dimen132 \lst@newlines=\count118 \lst@lineno=\count119 \lst@maxwidth=\dimen133 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz) \c@lstnumber=\count120 \lst@skipnumbers=\count121 \lst@framebox=\box27 ) (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.cfg File: listings.cfg 2015/06/04 1.6 listings configuration )) Package: listings 2015/06/04 1.6 (Carsten Heinz) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty File: lstlang1.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty File: lstlang2.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty File: lstlang3.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty File: lstlang1.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty File: lstlang2.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty File: lstlang3.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty File: lstlang1.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty File: lstlang2.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty File: lstlang3.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz) ) (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hyperref.sty Package: hyperref 2018/02/06 v6.86b Hypertext links for LaTeX (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty Package: hobsub-hyperref 2016/05/16 v1.14 Bundle oberdiek, subset hyperref (HO) (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty Package: hobsub-generic 2016/05/16 v1.14 Bundle oberdiek, subset generic (HO) Package: hobsub 2016/05/16 v1.14 Construct package bundles (HO) Package: infwarerr 2016/05/16 v1.4 Providing info/warning/error messages (HO) Package: ltxcmds 2016/05/16 v1.23 LaTeX kernel commands for general use (HO) Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO) Package ifluatex Info: LuaTeX not detected. Package: ifvtex 2016/05/16 v1.6 Detect VTeX and its facilities (HO) Package ifvtex Info: VTeX not detected. Package: intcalc 2016/05/16 v1.2 Expandable calculations with integers (HO) Package: ifpdf 2017/03/15 v3.2 Provides the ifpdf switch Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO) Package etexcmds Info: Could not find \expanded. (etexcmds) That can mean that you are not using pdfTeX 1.50 or (etexcmds) that some package has redefined \expanded. (etexcmds) In the latter case, load this package earlier. Package: kvsetkeys 2016/05/16 v1.17 Key value parser (HO) Package: kvdefinekeys 2016/05/16 v1.4 Define keys (HO) Package: pdftexcmds 2018/01/21 v0.26 Utility functions of pdfTeX for LuaTeX (HO ) Package pdftexcmds Info: LuaTeX not detected. Package pdftexcmds Info: \pdf@primitive is available. Package pdftexcmds Info: \pdf@ifprimitive is available. Package pdftexcmds Info: \pdfdraftmode found. Package: pdfescape 2016/05/16 v1.14 Implements pdfTeX's escape features (HO) Package: bigintcalc 2016/05/16 v1.4 Expandable calculations on big integers (HO ) Package: bitset 2016/05/16 v1.2 Handle bit-vector datatype (HO) Package: uniquecounter 2016/05/16 v1.3 Provide unlimited unique counter (HO) ) Package hobsub Info: Skipping package `hobsub' (already loaded). Package: letltxmacro 2016/05/16 v1.5 Let assignment for LaTeX macros (HO) Package: hopatch 2016/05/16 v1.3 Wrapper for package hooks (HO) Package: xcolor-patch 2016/05/16 xcolor patch Package: atveryend 2016/05/16 v1.9 Hooks at the very end of document (HO) Package atveryend Info: \enddocument detected (standard20110627). Package: atbegshi 2016/06/09 v1.18 At begin shipout hook (HO) Package: refcount 2016/05/16 v3.5 Data extraction from label references (HO) Package: hycolor 2016/05/16 v1.8 Color options for hyperref/bookmark (HO) ) (/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/auxhook.sty Package: auxhook 2016/05/16 v1.4 Hooks for auxiliary files (HO) ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/kvoptions.sty Package: kvoptions 2016/05/16 v3.12 Key value format for package options (HO) ) \@linkdim=\dimen134 \Hy@linkcounter=\count122 \Hy@pagecounter=\count123 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/pd1enc.def File: pd1enc.def 2018/02/06 v6.86b Hyperref: PDFDocEncoding definition (HO) Now handling font encoding PD1 ... ... no UTF-8 mapping file for font encoding PD1 ) \Hy@SavedSpaceFactor=\count124 (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/hyperref.cfg File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive ) Package hyperref Info: Hyper figures OFF on input line 4509. Package hyperref Info: Link nesting OFF on input line 4514. Package hyperref Info: Hyper index ON on input line 4517. Package hyperref Info: Plain pages OFF on input line 4524. Package hyperref Info: Backreferencing OFF on input line 4529. Package hyperref Info: Implicit mode ON; LaTeX internals redefined. Package hyperref Info: Bookmarks ON on input line 4762. \c@Hy@tempcnt=\count125 LaTeX Info: Redefining \url on input line 5115. \XeTeXLinkMargin=\dimen135 \Fld@menulength=\count126 \Field@Width=\dimen136 \Fld@charsize=\dimen137 Package hyperref Info: Hyper figures OFF on input line 6369. Package hyperref Info: Link nesting OFF on input line 6374. Package hyperref Info: Hyper index ON on input line 6377. Package hyperref Info: backreferencing OFF on input line 6384. Package hyperref Info: Link coloring OFF on input line 6389. Package hyperref Info: Link coloring with OCG OFF on input line 6394. Package hyperref Info: PDF/A mode OFF on input line 6399. LaTeX Info: Redefining \ref on input line 6439. LaTeX Info: Redefining \pageref on input line 6443. \Hy@abspage=\count127 \c@Item=\count128 \c@Hfootnote=\count129 ) Package hyperref Info: Driver (autodetected): hpdftex. (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hpdftex.def File: hpdftex.def 2018/02/06 v6.86b Hyperref driver for pdfTeX \Fld@listcount=\count130 \c@bookmark@seq@number=\count131 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty Package: rerunfilecheck 2016/05/16 v1.8 Rerun checks for auxiliary files (HO) Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2 82. ) \Hy@SectionHShift=\skip65 ) Package hyperref Info: Option `colorlinks' set `true' on input line 105. (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pdfpages.sty Package: pdfpages 2017/10/31 v0.5l Insert pages of external PDF documents (AM) (/usr/share/texlive/texmf-dist/tex/latex/base/ifthen.sty Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC) ) (/usr/share/texlive/texmf-dist/tex/latex/tools/calc.sty Package: calc 2014/10/28 v4.3 Infix arithmetic (KKT,FJ) \calc@Acount=\count132 \calc@Bcount=\count133 \calc@Adimen=\dimen138 \calc@Bdimen=\dimen139 \calc@Askip=\skip66 \calc@Bskip=\skip67 LaTeX Info: Redefining \setlength on input line 80. LaTeX Info: Redefining \addtolength on input line 81. \calc@Ccount=\count134 \calc@Cskip=\skip68 ) (/usr/share/texlive/texmf-dist/tex/latex/eso-pic/eso-pic.sty Package: eso-pic 2015/07/21 v2.0g eso-pic (RN) ) \AM@pagewidth=\dimen140 \AM@pageheight=\dimen141 (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pppdftex.def File: pppdftex.def 2017/10/31 v0.5l Pdfpages driver for pdfTeX (AM) ) \AM@pagebox=\box28 \AM@global@opts=\toks20 \AM@toc@title=\toks21 \c@AM@survey=\count135 \AM@templatesizebox=\box29 ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bookmark.sty Package: bookmark 2016/05/17 v1.26 PDF bookmarks (HO) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bkm-pdftex.def File: bkm-pdftex.def 2016/05/17 v1.26 bookmark driver for pdfTeX (HO) \BKM@id=\count136 )) (/usr/share/texlive/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex \pgfutil@everybye=\toks22 \pgfutil@tempdima=\dimen142 \pgfutil@tempdimb=\dimen143 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common-lists.t ex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def \pgfutil@abb=\box30 (/usr/share/texlive/texmf-dist/tex/latex/ms/everyshi.sty Package: everyshi 2001/05/15 v3.00 EveryShipout Package (MS) )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex Package: pgfrcs 2015/08/07 v3.0.1a (rcs-revision 1.31) )) Package: pgf 2015/08/07 v3.0.1a (rcs-revision 1.15) (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex Package: pgfsys 2014/07/09 v3.0.1a (rcs-revision 1.48) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex \pgfkeys@pathtoks=\toks23 \pgfkeys@temptoks=\toks24 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeysfiltered.code.t ex \pgfkeys@tmptoks=\toks25 )) \pgf@x=\dimen144 \pgf@y=\dimen145 \pgf@xa=\dimen146 \pgf@ya=\dimen147 \pgf@xb=\dimen148 \pgf@yb=\dimen149 \pgf@xc=\dimen150 \pgf@yc=\dimen151 \w@pgf@writea=\write3 \r@pgf@reada=\read1 \c@pgf@counta=\count137 \c@pgf@countb=\count138 \c@pgf@countc=\count139 \c@pgf@countd=\count140 \t@pgf@toka=\toks26 \t@pgf@tokb=\toks27 \t@pgf@tokc=\toks28 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg File: pgf.cfg 2008/05/14 (rcs-revision 1.7) ) Driver file for pgf: pgfsys-pdftex.def (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def File: pgfsys-pdftex.def 2014/10/11 (rcs-revision 1.35) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.de f File: pgfsys-common-pdf.def 2013/10/10 (rcs-revision 1.13) ))) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code. tex File: pgfsyssoftpath.code.tex 2013/09/09 (rcs-revision 1.9) \pgfsyssoftpath@smallbuffer@items=\count141 \pgfsyssoftpath@bigbuffer@items=\count142 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code. tex File: pgfsysprotocol.code.tex 2006/10/16 (rcs-revision 1.4) )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex Package: pgfcore 2010/04/11 v3.0.1a (rcs-revision 1.7) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex \pgfmath@dimen=\dimen152 \pgfmath@count=\count143 \pgfmath@box=\box31 \pgfmath@toks=\toks29 \pgfmath@stack@operand=\toks30 \pgfmath@stack@operation=\toks31 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code .tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonomet ric.code.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.cod e.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison .code.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code. tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code .tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code. tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerari thmetics.code.tex))) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex \c@pgfmathroundto@lastzeros=\count144 )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.te x File: pgfcorepoints.code.tex 2013/10/07 (rcs-revision 1.27) \pgf@picminx=\dimen153 \pgf@picmaxx=\dimen154 \pgf@picminy=\dimen155 \pgf@picmaxy=\dimen156 \pgf@pathminx=\dimen157 \pgf@pathmaxx=\dimen158 \pgf@pathminy=\dimen159 \pgf@pathmaxy=\dimen160 \pgf@xx=\dimen161 \pgf@xy=\dimen162 \pgf@yx=\dimen163 \pgf@yy=\dimen164 \pgf@zx=\dimen165 \pgf@zy=\dimen166 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct. code.tex File: pgfcorepathconstruct.code.tex 2013/10/07 (rcs-revision 1.29) \pgf@path@lastx=\dimen167 \pgf@path@lasty=\dimen168 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code .tex File: pgfcorepathusage.code.tex 2014/11/02 (rcs-revision 1.24) \pgf@shorten@end@additional=\dimen169 \pgf@shorten@start@additional=\dimen170 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.te x File: pgfcorescopes.code.tex 2015/05/08 (rcs-revision 1.46) \pgfpic=\box32 \pgf@hbox=\box33 \pgf@layerbox@main=\box34 \pgf@picture@serial@count=\count145 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.c ode.tex File: pgfcoregraphicstate.code.tex 2014/11/02 (rcs-revision 1.12) \pgflinewidth=\dimen171 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformation s.code.tex File: pgfcoretransformations.code.tex 2015/08/07 (rcs-revision 1.20) \pgf@pt@x=\dimen172 \pgf@pt@y=\dimen173 \pgf@pt@temp=\dimen174 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex File: pgfcorequick.code.tex 2008/10/09 (rcs-revision 1.3) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.t ex File: pgfcoreobjects.code.tex 2006/10/11 (rcs-revision 1.2) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing .code.tex File: pgfcorepathprocessing.code.tex 2013/09/09 (rcs-revision 1.9) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.te x File: pgfcorearrows.code.tex 2015/05/14 (rcs-revision 1.43) \pgfarrowsep=\dimen175 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex File: pgfcoreshade.code.tex 2013/07/15 (rcs-revision 1.15) \pgf@max=\dimen176 \pgf@sys@shading@range@num=\count146 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex File: pgfcoreimage.code.tex 2013/07/15 (rcs-revision 1.18) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code. tex File: pgfcoreexternal.code.tex 2014/07/09 (rcs-revision 1.21) \pgfexternal@startupbox=\box35 )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.te x File: pgfcorelayers.code.tex 2013/07/18 (rcs-revision 1.7) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.c ode.tex File: pgfcoretransparency.code.tex 2013/09/30 (rcs-revision 1.5) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code. tex File: pgfcorepatterns.code.tex 2013/11/07 (rcs-revision 1.5) ))) (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex File: pgfmoduleshapes.code.tex 2014/03/21 (rcs-revision 1.35) \pgfnodeparttextbox=\box36 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex File: pgfmoduleplot.code.tex 2015/08/03 (rcs-revision 1.13) ) (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65 .sty Package: pgfcomp-version-0-65 2007/07/03 v3.0.1a (rcs-revision 1.7) \pgf@nodesepstart=\dimen177 \pgf@nodesepend=\dimen178 ) (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18 .sty Package: pgfcomp-version-1-18 2007/07/23 v3.0.1a (rcs-revision 1.1) )) (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (/usr/share/texlive/texmf-dist/tex/latex/pgf/math/pgfmath.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex Package: pgffor 2013/12/13 v3.0.1a (rcs-revision 1.25) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex) \pgffor@iter=\dimen179 \pgffor@skip=\dimen180 \pgffor@stack=\toks32 \pgffor@toks=\toks33 )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex Package: tikz 2015/08/07 v3.0.1a (rcs-revision 1.151) (/usr/share/texlive/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers .code.tex File: pgflibraryplothandlers.code.tex 2013/08/31 v3.0.1a (rcs-revision 1.20) \pgf@plot@mark@count=\count147 \pgfplotmarksize=\dimen181 ) \tikz@lastx=\dimen182 \tikz@lasty=\dimen183 \tikz@lastxsaved=\dimen184 \tikz@lastysaved=\dimen185 \tikzleveldistance=\dimen186 \tikzsiblingdistance=\dimen187 \tikz@figbox=\box37 \tikz@figbox@bg=\box38 \tikz@tempbox=\box39 \tikz@tempbox@bg=\box40 \tikztreelevel=\count148 \tikznumberofchildren=\count149 \tikznumberofcurrentchild=\count150 \tikz@fig@count=\count151 (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex File: pgfmodulematrix.code.tex 2013/09/17 (rcs-revision 1.8) \pgfmatrixcurrentrow=\count152 \pgfmatrixcurrentcolumn=\count153 \pgf@matrix@numberofcolumns=\count154 ) \tikz@expandcount=\count155 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik zlibrarytopaths.code.tex File: tikzlibrarytopaths.code.tex 2008/06/17 v3.0.1a (rcs-revision 1.2) ))) (/usr/share/texlive/texmf-dist/tex/latex/titlesec/titlesec.sty Package: titlesec 2016/03/21 v2.10.2 Sectioning titles \ttl@box=\box41 \beforetitleunit=\skip69 \aftertitleunit=\skip70 \ttl@plus=\dimen188 \ttl@minus=\dimen189 \ttl@toksa=\toks34 \titlewidth=\dimen190 \titlewidthlast=\dimen191 \titlewidthfirst=\dimen192 ) (/usr/share/texlive/texmf-dist/tex/latex/titlesec/ttlkeys.def File: ttlkeys.def 2016/03/15 \c@ttlp@side=\count156 \ttlp@side=\count157 ) \c@myparts=\count158 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty Package: amsmath 2017/09/02 v2.17a AMS math features \@mathmargin=\skip71 For additional information on amsmath, use the `?' option. (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty Package: amstext 2000/06/29 v2.01 AMS text (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty File: amsgen.sty 1999/11/30 v2.0 generic functions \@emptytoks=\toks35 \ex@=\dimen193 )) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty Package: amsbsy 1999/11/29 v1.2d Bold Symbols \pmbraise@=\dimen194 ) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty Package: amsopn 2016/03/08 v2.02 operator names ) \inf@bad=\count159 LaTeX Info: Redefining \frac on input line 213. \uproot@=\count160 \leftroot@=\count161 LaTeX Info: Redefining \overline on input line 375. \classnum@=\count162 \DOTSCASE@=\count163 LaTeX Info: Redefining \ldots on input line 472. LaTeX Info: Redefining \dots on input line 475. LaTeX Info: Redefining \cdots on input line 596. \Mathstrutbox@=\box42 \strutbox@=\box43 \big@size=\dimen195 LaTeX Font Info: Redeclaring font encoding OML on input line 712. LaTeX Font Info: Redeclaring font encoding OMS on input line 713. \macc@depth=\count164 \c@MaxMatrixCols=\count165 \dotsspace@=\muskip11 \c@parentequation=\count166 \dspbrk@lvl=\count167 \tag@help=\toks36 \row@=\count168 \column@=\count169 \maxfields@=\count170 \andhelp@=\toks37 \eqnshift@=\dimen196 \alignsep@=\dimen197 \tagshift@=\dimen198 \tagwidth@=\dimen199 \totwidth@=\dimen256 \lineht@=\dimen257 \@envbody=\toks38 \multlinegap=\skip72 \multlinetaggap=\skip73 \mathdisplay@stack=\toks39 LaTeX Info: Redefining \[ on input line 2817. LaTeX Info: Redefining \] on input line 2818. ) (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amsfonts.sty Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support \symAMSa=\mathgroup6 \symAMSb=\mathgroup7 LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' (Font) U/euf/m/n --> U/euf/b/n on input line 106. LaTeX Font Info: Redeclaring math symbol \square on input line 141. ) (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amssymb.sty Package: amssymb 2013/01/14 v3.01 AMS font symbols LaTeX Font Info: Redeclaring math symbol \blacksquare on input line 48. LaTeX Font Info: Redeclaring math symbol \vDash on input line 60. LaTeX Font Info: Redeclaring math symbol \leftleftarrows on input line 63. LaTeX Font Info: Redeclaring math symbol \rightrightarrows on input line 64. LaTeX Font Info: Redeclaring math symbol \leqslant on input line 101. LaTeX Font Info: Redeclaring math symbol \geqslant on input line 108. LaTeX Font Info: Redeclaring math symbol \blacktriangleright on input line 1 20. LaTeX Font Info: Redeclaring math symbol \blacktriangleleft on input line 12 1. LaTeX Font Info: Redeclaring math symbol \complement on input line 165. LaTeX Font Info: Redeclaring math symbol \intercal on input line 166. LaTeX Font Info: Redeclaring math symbol \nleqslant on input line 181. LaTeX Font Info: Redeclaring math symbol \ngeqslant on input line 182. LaTeX Font Info: Redeclaring math symbol \varsubsetneq on input line 203. LaTeX Font Info: Redeclaring math symbol \subsetneqq on input line 207. LaTeX Font Info: Redeclaring math symbol \nparallel on input line 215. LaTeX Font Info: Redeclaring math symbol \nvDash on input line 221. LaTeX Font Info: Redeclaring math symbol \nexists on input line 235. LaTeX Font Info: Redeclaring math symbol \smallsetminus on input line 251. LaTeX Font Info: Redeclaring math symbol \curvearrowleft on input line 257. LaTeX Font Info: Redeclaring math symbol \curvearrowright on input line 258. LaTeX Font Info: Redeclaring math symbol \varkappa on input line 260. LaTeX Font Info: Redeclaring math symbol \hslash on input line 262. ) (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mathtools.sty Package: mathtools 2018/01/08 v1.21 mathematical typesetting tools (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mhsetup.sty Package: mhsetup 2017/03/31 v1.3 programming setup (MH) ) LaTeX Info: Thecontrolsequence`\('isalreadyrobust on input line 129. LaTeX Info: Thecontrolsequence`\)'isalreadyrobust on input line 129. LaTeX Info: Thecontrolsequence`\['isalreadyrobust on input line 129. LaTeX Info: Thecontrolsequence`\]'isalreadyrobust on input line 129. \g_MT_multlinerow_int=\count171 \l_MT_multwidth_dim=\dimen258 \origjot=\skip74 \l_MT_shortvdotswithinadjustabove_dim=\dimen259 \l_MT_shortvdotswithinadjustbelow_dim=\dimen260 \l_MT_above_intertext_sep=\dimen261 \l_MT_below_intertext_sep=\dimen262 \l_MT_above_shortintertext_sep=\dimen263 \l_MT_below_shortintertext_sep=\dimen264 )) (./head/settings_custom.tex (/usr/share/texlive/texmf-dist/tex/latex/algorithm2e/algorithm2e.sty Package: algorithm2e 2017/07/18 v5.2 algorithms environments \c@AlgoLine=\count172 \algocf@hangindent=\skip75 (/usr/share/texlive/texmf-dist/tex/latex/ifoddpage/ifoddpage.sty Package: ifoddpage 2016/04/23 v1.1 Conditionals for odd/even page detection \c@checkoddpage=\count173 ) (/usr/share/texlive/texmf-dist/tex/latex/tools/xspace.sty Package: xspace 2014/10/28 v1.13 Space after command names (DPC,MH) ) (/usr/share/texlive/texmf-dist/tex/latex/relsize/relsize.sty Package: relsize 2013/03/29 ver 4.1 ) \skiptotal=\skip76 \skiplinenumber=\skip77 \skiprule=\skip78 \skiphlne=\skip79 \skiptext=\skip80 \skiplength=\skip81 \algomargin=\skip82 \skipalgocfslide=\skip83 \algowidth=\dimen265 \inoutsize=\dimen266 \inoutindent=\dimen267 \interspacetitleruled=\dimen268 \interspacealgoruled=\dimen269 \interspacetitleboxruled=\dimen270 \algocf@ruledwidth=\skip84 \algocf@inoutbox=\box44 \algocf@inputbox=\box45 \AlCapSkip=\skip85 \AlCapHSkip=\skip86 \algoskipindent=\skip87 \algocf@nlbox=\box46 \algocf@hangingbox=\box47 \algocf@untilbox=\box48 \algocf@skipuntil=\skip88 \algocf@capbox=\box49 \algocf@lcaptionbox=\skip89 \algoheightruledefault=\skip90 \algoheightrule=\skip91 \algotitleheightruledefault=\skip92 \algotitleheightrule=\skip93 \c@algocfline=\count174 \c@algocfproc=\count175 \c@algocf=\count176 \algocf@algoframe=\box50 \algocf@algobox=\box51 ) (/usr/share/texlive/texmf-dist/tex/latex/float/float.sty Package: float 2001/11/08 v1.3d Float enhancements (AL) \c@float@type=\count177 \float@exts=\toks40 \float@box=\box52 \@float@everytoks=\toks41 \@floatcapt=\box53 ) (/usr/share/texlive/texmf-dist/tex/latex/hyperref/xr-hyper.sty Package: xr-hyper 2000/03/22 v6.00beta4 eXternal References (DPC) Package xr-hyper Warning: Load package `hyperref' after `xr-hyper'. ) (/usr/share/texlive/texmf-dist/tex/latex/makecell/makecell.sty Package: makecell 2009/08/03 V0.1e Managing of Tab Column Heads and Cells (/usr/share/texlive/texmf-dist/tex/latex/tools/array.sty Package: array 2016/10/06 v2.4d Tabular extension package (FMi) \col@sep=\dimen271 \extrarowheight=\dimen272 \NC@list=\toks42 \extratabsurround=\skip94 \backup@length=\skip95 ) \rotheadsize=\dimen273 \c@nlinenum=\count178 \TeXr@lab=\toks43 ) (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.sty (/usr/share/texlive/texmf-dist/tex/latex/l3kernel/expl3.sty Package: expl3 2018/02/21 L3 programming layer (loader) (/usr/share/texlive/texmf-dist/tex/latex/l3kernel/expl3-code.tex Package: expl3 2018/02/21 L3 programming layer (code) \c_max_int=\count179 \l_tmpa_int=\count180 \l_tmpb_int=\count181 \g_tmpa_int=\count182 \g_tmpb_int=\count183 \g__intarray_font_int=\count184 \g__prg_map_int=\count185 \c_log_iow=\count186 \l_iow_line_count_int=\count187 \l__iow_line_target_int=\count188 \l__iow_one_indent_int=\count189 \l__iow_indent_int=\count190 \c_zero_dim=\dimen274 \c_max_dim=\dimen275 \l_tmpa_dim=\dimen276 \l_tmpb_dim=\dimen277 \g_tmpa_dim=\dimen278 \g_tmpb_dim=\dimen279 \c_zero_skip=\skip96 \c_max_skip=\skip97 \l_tmpa_skip=\skip98 \l_tmpb_skip=\skip99 \g_tmpa_skip=\skip100 \g_tmpb_skip=\skip101 \c_zero_muskip=\muskip12 \c_max_muskip=\muskip13 \l_tmpa_muskip=\muskip14 \l_tmpb_muskip=\muskip15 \g_tmpa_muskip=\muskip16 \g_tmpb_muskip=\muskip17 \l_keys_choice_int=\count191 \c__fp_leading_shift_int=\count192 \c__fp_middle_shift_int=\count193 \c__fp_trailing_shift_int=\count194 \c__fp_big_leading_shift_int=\count195 \c__fp_big_middle_shift_int=\count196 \c__fp_big_trailing_shift_int=\count197 \c__fp_Bigg_leading_shift_int=\count198 \c__fp_Bigg_middle_shift_int=\count199 \c__fp_Bigg_trailing_shift_int=\count266 \c__fp_rand_size_int=\count267 \c__fp_rand_four_int=\count268 \c__fp_rand_eight_int=\count269 \l__sort_length_int=\count270 \l__sort_min_int=\count271 \l__sort_top_int=\count272 \l__sort_max_int=\count273 \l__sort_true_max_int=\count274 \l__sort_block_int=\count275 \l__sort_begin_int=\count276 \l__sort_end_int=\count277 \l__sort_A_int=\count278 \l__sort_B_int=\count279 \l__sort_C_int=\count280 \l__tl_build_start_index_int=\count281 \l__tl_build_index_int=\count282 \l__tl_analysis_normal_int=\count283 \l__tl_analysis_index_int=\count284 \l__tl_analysis_nesting_int=\count285 \l__tl_analysis_type_int=\count286 \l__regex_internal_a_int=\count287 \l__regex_internal_b_int=\count288 \l__regex_internal_c_int=\count289 \l__regex_balance_int=\count290 \l__regex_group_level_int=\count291 \l__regex_mode_int=\count292 \c__regex_cs_in_class_mode_int=\count293 \c__regex_cs_mode_int=\count294 \l__regex_catcodes_int=\count295 \l__regex_default_catcodes_int=\count296 \c__regex_catcode_D_int=\count297 \c__regex_catcode_S_int=\count298 \c__regex_catcode_L_int=\count299 \c__regex_catcode_O_int=\count300 \c__regex_catcode_A_int=\count301 \c__regex_all_catcodes_int=\count302 \l__regex_show_lines_int=\count303 \l__regex_min_state_int=\count304 \l__regex_max_state_int=\count305 \l__regex_left_state_int=\count306 \l__regex_right_state_int=\count307 \l__regex_capturing_group_int=\count308 \l__regex_min_pos_int=\count309 \l__regex_max_pos_int=\count310 \l__regex_curr_pos_int=\count311 \l__regex_start_pos_int=\count312 \l__regex_success_pos_int=\count313 \l__regex_curr_char_int=\count314 \l__regex_curr_catcode_int=\count315 \l__regex_last_char_int=\count316 \l__regex_case_changed_char_int=\count317 \l__regex_curr_state_int=\count318 \l__regex_step_int=\count319 \l__regex_min_active_int=\count320 \l__regex_max_active_int=\count321 \l__regex_replacement_csnames_int=\count322 \l__regex_match_count_int=\count323 \l__regex_min_submatch_int=\count324 \l__regex_submatch_int=\count325 \l__regex_zeroth_submatch_int=\count326 \g__regex_trace_regex_int=\count327 \c_empty_box=\box54 \l_tmpa_box=\box55 \l_tmpb_box=\box56 \g_tmpa_box=\box57 \g_tmpb_box=\box58 \l__box_top_dim=\dimen280 \l__box_bottom_dim=\dimen281 \l__box_left_dim=\dimen282 \l__box_right_dim=\dimen283 \l__box_top_new_dim=\dimen284 \l__box_bottom_new_dim=\dimen285 \l__box_left_new_dim=\dimen286 \l__box_right_new_dim=\dimen287 \l__box_internal_box=\box59 \l__coffin_internal_box=\box60 \l__coffin_internal_dim=\dimen288 \l__coffin_offset_x_dim=\dimen289 \l__coffin_offset_y_dim=\dimen290 \l__coffin_x_dim=\dimen291 \l__coffin_y_dim=\dimen292 \l__coffin_x_prime_dim=\dimen293 \l__coffin_y_prime_dim=\dimen294 \c_empty_coffin=\box61 \l__coffin_aligned_coffin=\box62 \l__coffin_aligned_internal_coffin=\box63 \l_tmpa_coffin=\box64 \l_tmpb_coffin=\box65 \l__coffin_display_coffin=\box66 \l__coffin_display_coord_coffin=\box67 \l__coffin_display_pole_coffin=\box68 \l__coffin_display_offset_dim=\dimen295 \l__coffin_display_x_dim=\dimen296 \l__coffin_display_y_dim=\dimen297 \l__coffin_bounding_shift_dim=\dimen298 \l__coffin_left_corner_dim=\dimen299 \l__coffin_right_corner_dim=\dimen300 \l__coffin_bottom_corner_dim=\dimen301 \l__coffin_top_corner_dim=\dimen302 \l__coffin_scaled_total_height_dim=\dimen303 \l__coffin_scaled_width_dim=\dimen304 ) (/usr/share/texlive/texmf-dist/tex/latex/l3kernel/l3pdfmode.def File: l3pdfmode.def 2017/03/18 v L3 Experimental driver: PDF mode \l__driver_color_stack_int=\count328 \l__driver_tmp_box=\box69 )) (/usr/share/texlive/texmf-dist/tex/latex/l3packages/xparse/xparse.sty Package: xparse 2018/02/21 L3 Experimental document command parser \l__xparse_current_arg_int=\count329 \g__xparse_grabber_int=\count330 \l__xparse_m_args_int=\count331 \l__xparse_mandatory_args_int=\count332 \l__xparse_v_nesting_int=\count333 ) (/usr/share/texlive/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty Package: l3keys2e 2018/02/21 LaTeX2e option processing using LaTeX3 keys ) Package: chemmacros 2017/08/28 v5.8b comprehensive support for typesetting chem istry documents (CN) ................................................. . LaTeX info: "xparse/define-command" . . Defining command \IfChemCompatibilityTF with sig. 'mm+m+m' on line 190. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \IfChemCompatibilityT with sig. 'mm+m' on line 193. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \IfChemCompatibilityF with sig. 'mm+m' on line 196. ................................................. (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros5.sty Package: chemmacros5 2017/08/28 v5.8b comprehensive support for typesetting che mistry documents (CN) \l__chemmacros_tmpa_dim=\dimen305 \l__chemmacros_tmpb_dim=\dimen306 \l__chemmacros_tmpc_dim=\dimen307 \l__chemmacros_tmpa_int=\count334 \l__chemmacros_tmpb_int=\count335 \l__chemmacros_tmpc_int=\count336 \l__chemmacros_tmpa_box=\box70 \l__chemmacros_tmpb_box=\box71 \l__chemmacros_tmpc_box=\box72 ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ChemModule with sig. 'smmO{5.0}' on line 258. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \usechemmodule with sig. 'm' on line 262. ................................................. \g__file_internal_ior=\read2 (chemmacros) Loading module `base'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.base.code .tex File: chemmacros.module.base.code.tex 2017/08/28 v5.8b chemmacros module `base' 2017/08/28 basic chemmacros module (/usr/share/texlive/texmf-dist/tex/latex/etoolbox/etoolbox.sty Package: etoolbox 2018/02/11 v2.5e e-TeX tools for LaTeX (JAW) \etb@tempcnta=\count337 ) ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemDeprecated with sig. 'mm' on line 53. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemMacroset with sig. 'smmm' on line 151. ................................................. (/usr/share/texlive/texmf-dist/tex/latex/koma-script/scrlfile.sty Package: scrlfile 2017/09/07 v3.24 KOMA-Script package (loading files) ) ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ChemCleverefSupport with sig. 'mmomo' on line 356. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ChemFancyrefSupport with sig. 'mmo' on line 356. ................................................. (/usr/share/texlive/texmf-dist/tex/latex/tools/bm.sty Package: bm 2017/01/16 v1.2c Bold Symbol Support (DPC/FMi) \symboldoperators=\mathgroup8 \symboldletters=\mathgroup9 \symboldotherletters=\mathgroup10 LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 141. LaTeX Info: Redefining \bm on input line 207. ) ................................................. . LaTeX info: "xparse/define-command" . . Defining command \chemsetup with sig. 'om' on line 428. ................................................. (chemmacros) Loading module `errorcheck'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.errorchec k.code.tex File: chemmacros.module.errorcheck.code.tex 2017/08/28 v5.8b chemmacros module `errorcheck' 2016/10/05 error checking for unloaded modules )) (chemmacros) Loading module `lang'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.lang.code .tex File: chemmacros.module.lang.code.tex 2017/08/28 v5.8b chemmacros module `lang' 2016/05/31 language settings for chemmacros (/usr/share/texlive/texmf-dist/tex/latex/translations/translations.sty Package: translations 2017/08/31 v1.7a internationalization of LaTeX2e packages (CN) ) ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ChemTranslate with sig. 'm' on line 68. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemTranslations with sig. 'mm' on line 140. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemTranslation with sig. 'mmm' on line 144. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ForAllChemTranslationsDo with sig. '+m' on line 162. ................................................. ) (chemmacros) Loading module `greek'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.greek.cod e.tex File: chemmacros.module.greek.code.tex 2017/08/28 v5.8b chemmacros module `gree k' 2015/06/09 upright greek symbols (/usr/share/texlive/texmf-dist/tex/latex/chemgreek/chemgreek.sty Package: chemgreek 2016/12/20 v1.1 interfaceforuprightgreeklettersforuseinchemi stry (CN) \l__chemgreek_tmpa_int=\count338 \g__chemgreek_tmpa_int=\count339 ................................................. . LaTeX info: "xparse/define-command" . . Defining command \newchemgreekmapping with sig. 'O{}mm' on line 336. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \renewchemgreekmapping with sig. 'O{}mm' on line 339. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \declarechemgreekmapping with sig. 'O{}mm' on line 342. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \newchemgreekmappingalias with sig. 'mm' on line 347. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \renewchemgreekmappingalias with sig. 'mm' on line 350. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \declarechemgreekmappingalias with sig. 'mm' on line 353. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \changechemgreeksymbol with sig. 'mmmm' on line 383. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \chemgreekmappingsymbol with sig. 'mm' on line 477. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \activatechemgreekmapping with sig. 'sm' on line 486. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \selectchemgreekmapping with sig. 'm' on line 491. ................................................. )) (chemmacros) Loading module `chemformula'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.chemformu la.code.tex File: chemmacros.module.chemformula.code.tex 2017/08/28 v5.8b chemmacros module `chemformula' 2016/05/03 integration of chemical formulas (chemmacros) Loading module `charges'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.charges.c ode.tex File: chemmacros.module.charges.code.tex 2017/08/28 v5.8b chemmacros module `ch arges' 2015/07/30 charges ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemCharge with sig. 'mm' on line 122. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemCharge with sig. 'mm' on line 122. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemCharge with sig. 'mm' on line 122. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemCharge with sig. 'mm' on line 122. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemPartialCharge with sig. 'mm' on line 125. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemPartialCharge with sig. 'mm' on line 125. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemPartialCharge with sig. 'mm' on line 125. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemPartialCharge with sig. 'mm' on line 125. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \mch with sig. 'o' on line 146. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \pch with sig. 'o' on line 147. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \fmch with sig. 'o' on line 148. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \fpch with sig. 'o' on line 149. ................................................. )) (chemmacros) Loading module `acid-base'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.acid-base .code.tex File: chemmacros.module.acid-base.code.tex 2017/08/28 v5.8b chemmacros module ` acid-base' 2016/05/31 acid/base ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemEqConstant with sig. 'mmm' on line 87. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemEqConstant with sig. 'mmm' on line 87. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemEqConstant with sig. 'mmm' on line 87. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemEqConstant with sig. 'mmm' on line 87. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \p with sig. 'm' on line 119. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \pH with sig. '' on line 120. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \pOH with sig. '' on line 121. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \pKa with sig. 'o' on line 130. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \pKb with sig. 'o' on line 139. ................................................. ) (chemmacros) Loading module `symbols'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.symbols.c ode.tex File: chemmacros.module.symbols.code.tex 2017/08/28 v5.8b chemmacros module `sy mbols' 2015/06/09 symbols ................................................. . LaTeX info: "xparse/define-command" . . Defining command \standardstate with sig. '' on line 67. ................................................. ) (chemmacros) Loading module `particles'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.particles .code.tex File: chemmacros.module.particles.code.tex 2017/08/28 v5.8b chemmacros module ` particles' 2016/04/02 particles ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemParticle with sig. 'mm' on line 45. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemParticle with sig. 'mm' on line 45. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemParticle with sig. 'mm' on line 45. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemParticle with sig. 'mm' on line 45. ................................................. \l__chemmacros_nucleophile_dim=\dimen308 ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemNucleophile with sig. 'mm' on line 111. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemNucleophile with sig. 'mm' on line 111. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemNucleophile with sig. 'mm' on line 111. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemNucleophile with sig. 'mm' on line 111. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \Nuc with sig. 'o' on line 130. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ba with sig. 'o' on line 131. ................................................. ) (chemmacros) Loading module `phases'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.phases.co de.tex File: chemmacros.module.phases.code.tex 2017/08/28 v5.8b chemmacros module `pha ses' 2016/05/31 phase descriptors \l__chemmacros_phases_space_dim=\dimen309 ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemPhase with sig. 'mm' on line 45. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemPhase with sig. 'mm' on line 45. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemPhase with sig. 'mm' on line 45. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemPhase with sig. 'mm' on line 45. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \phase with sig. 'm' on line 93. ................................................. ................................................. . LaTeX info: "xparse/redefine-command" . . Redefining command \sld with sig. 'o' on line 95. ................................................. ................................................. . LaTeX info: "xparse/redefine-command" . . Redefining command \lqd with sig. 'o' on line 96. ................................................. ................................................. . LaTeX info: "xparse/redefine-command" . . Redefining command \gas with sig. 'o' on line 97. ................................................. ................................................. . LaTeX info: "xparse/redefine-command" . . Redefining command \aq with sig. 'o' on line 98. ................................................. ) (chemmacros) Loading module `nomenclature'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.nomenclat ure.code.tex File: chemmacros.module.nomenclature.code.tex 2017/08/28 v5.8b chemmacros modul e `nomenclature' 2017/06/11 chemical names (chemmacros) Loading module `tikz'... (/usr/share/texlive/texmf-dist/tex/latex/chemmacros/chemmacros.module.tikz.code .tex File: chemmacros.module.tikz.code.tex 2017/08/28 v5.8b chemmacros module `tikz' 2015/10/26 upright greek symbols (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik zlibrarycalc.code.tex File: tikzlibrarycalc.code.tex 2013/07/15 v3.0.1a (rcs-revision 1.9) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik zlibrarydecorations.pathmorphing.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik zlibrarydecorations.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduledecorations.cod e.tex \pgfdecoratedcompleteddistance=\dimen310 \pgfdecoratedremainingdistance=\dimen311 \pgfdecoratedinputsegmentcompleteddistance=\dimen312 \pgfdecoratedinputsegmentremainingdistance=\dimen313 \pgf@decorate@distancetomove=\dimen314 \pgf@decorate@repeatstate=\count340 \pgfdecorationsegmentamplitude=\dimen315 \pgfdecorationsegmentlength=\dimen316 ) \tikz@lib@dec@box=\box73 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/libraries/decorations/pgflibrary decorations.pathmorphing.code.tex)) \l__chemmacros_el_length_dim=\dimen317 ) ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemIUPAC with sig. 'mm' on line 209. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemIUPAC with sig. 'mm' on line 212. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemIUPAC with sig. 'mm' on line 215. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemIUPAC with sig. 'mm' on line 218. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \LetChemIUPAC with sig. 'mm' on line 221. ................................................. \l__chemmacros_cip_kern_dim=\dimen318 ................................................. . LaTeX info: "xparse/define-command" . . Defining command \Sconf with sig. 'O{S}' on line 349. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \Rconf with sig. 'O{R}' on line 350. ................................................. \l__chemmacros_iupac_hyphen_pre_dim=\dimen319 \l__chemmacros_iupac_hyphen_post_dim=\dimen320 \l__chemmacros_iupac_break_dim=\dimen321 \l__chemmacros_iupac_break_skip=\skip102 ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemIUPACShorthand with sig. 'mm' on line 604. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemIUPACShorthand with sig. 'mm' on line 611. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemIUPACShorthand with sig. 'mm' on line 617. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemIUPACShorthand with sig. 'mm' on line 624. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RemoveChemIUPACShorthand with sig. 'm' on line 627. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \iupac with sig. 'O{}m' on line 673. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemLatin with sig. 'mm' on line 755. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemLatin with sig. 'mm' on line 755. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemLatin with sig. 'mm' on line 755. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemLatin with sig. 'mm' on line 755. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \latin with sig. 'O{}m' on line 826. ................................................. )))) ................................................. . chemmacros info: "default-formula-method" . . You haven't chosen a formula method so I'm assuming the default method . `chemformula'. ................................................. (/usr/share/texlive/texmf-dist/tex/latex/chemformula/chemformula.sty (/usr/share/texlive/texmf-dist/tex/latex/l3packages/xfrac/xfrac.sty (/usr/share/texlive/texmf-dist/tex/latex/l3packages/xtemplate/xtemplate.sty Package: xtemplate 2018/02/21 L3 Experimental prototype document functions \l__xtemplate_tmp_dim=\dimen322 \l__xtemplate_tmp_int=\count341 \l__xtemplate_tmp_muskip=\muskip18 \l__xtemplate_tmp_skip=\skip103 ) Package: xfrac 2018/02/21 L3 Experimental split-level fractions \l__xfrac_slash_box=\box74 \l__xfrac_tmp_box=\box75 \l__xfrac_denominator_bot_sep_dim=\dimen323 \l__xfrac_numerator_bot_sep_dim=\dimen324 \l__xfrac_numerator_top_sep_dim=\dimen325 \l__xfrac_slash_left_sep_dim=\dimen326 \l__xfrac_slash_right_sep_dim=\dimen327 \l__xfrac_slash_left_muskip=\muskip19 \l__xfrac_slash_right_muskip=\muskip20 ................................................. . xtemplate info: "declare-object-type" . . Declaring object type 'xfrac' taking 3 argument(s) on line 80. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \sfrac with sig. 'omom' on line 420. ................................................. ) (/usr/share/texlive/texmf-dist/tex/latex/units/nicefrac.sty Package: nicefrac 1998/08/04 v0.9b Nice fractions \L@UnitsRaiseDisplaystyle=\skip104 \L@UnitsRaiseTextstyle=\skip105 \L@UnitsRaiseScriptstyle=\skip106 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/libraries/pgflibraryarrows.meta. code.tex File: pgflibraryarrows.meta.code.tex 2015/05/13 v3.0.1a (rcs-revision 1.13) \pgfarrowinset=\dimen328 \pgfarrowlength=\dimen329 \pgfarrowwidth=\dimen330 \pgfarrowlinewidth=\dimen331 ) Package: chemformula 2017/03/23 v4.15e typeset chemical compounds and reactions (CN) \l__chemformula_tmpa_dim=\dimen332 \l__chemformula_tmpb_dim=\dimen333 \l__chemformula_tmpc_dim=\dimen334 \l__chemformula_tmpa_int=\count342 \l__chemformula_tmpb_int=\count343 \l__chemformula_tmpc_int=\count344 \l__chemformula_tmpa_box=\box76 \l__chemformula_tmpb_box=\box77 \l__chemformula_arrow_length_dim=\dimen335 \l__chemformula_arrow_label_height_dim=\dimen336 \l__chemformula_arrow_label_offset_dim=\dimen337 \l__chemformula_arrow_minimum_length_dim=\dimen338 \l__chemformula_arrow_shortage_dim=\dimen339 \l__chemformula_arrow_offset_dim=\dimen340 \l__chemformula_arrow_yshift_dim=\dimen341 \l__chemformula_radical_radius_dim=\dimen342 \l__chemformula_radical_hshift_dim=\dimen343 \l__chemformula_radical_vshift_dim=\dimen344 \l__chemformula_radical_space_dim=\dimen345 \l__chemformula_arrow_head_dim=\dimen346 \l__chemformula_name_dim=\dimen347 \l__chemformula_adduct_space_dim=\dimen348 \l__chemformula_charge_shift_dim=\dimen349 \l__chemformula_subscript_shift_dim=\dimen350 \l__chemformula_superscript_shift_dim=\dimen351 \l__chemformula_subscript_dim=\dimen352 \l__chemformula_superscript_dim=\dimen353 \l__chemformula_bond_dim=\dimen354 \l__chemformula_bond_space_dim=\dimen355 \l__chemformula_elspec_pair_distance_dim=\dimen356 \l__chemformula_elspec_pair_line_length_dim=\dimen357 \l__chemformula_elspec_pair_width_dim=\dimen358 \l__chemformula_kroegervink_positive_radius_dim=\dimen359 \l__chemformula_kroegervink_positive_hshift_dim=\dimen360 \l__chemformula_kroegervink_positive_vshift_dim=\dimen361 \l__chemformula_kroegervink_positive_space_dim=\dimen362 \l__chemformula_stoich_space_skip=\skip107 \l__chemformula_math_space_skip=\skip108 \l__chemformula_count_tokens_int=\count345 \g__chemformula_lewis_int=\count346 \l__chemformula_arrow_arg_i_box=\box78 \l__chemformula_arrow_arg_ii_box=\box79 \l__chemformula_superscript_box=\box80 \l__chemformula_subscript_box=\box81 ................................................. . LaTeX info: "xparse/define-command" . . Defining command \charrow with sig. 'mO{}O{}' on line 823. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemArrow with sig. 'mm' on line 896. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemArrow with sig. 'mm' on line 904. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemArrow with sig. 'mm' on line 911. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemArrow with sig. 'mm' on line 921. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ShowChemArrow with sig. 'm' on line 931. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ch with sig. 'O{}m' on line 1176. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \chcpd with sig. 'O{}m' on line 1198. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \chname with sig. 'R(){}R(){}' on line 1276. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemCompoundProperty with sig. 'mm' on line 1361. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemCompoundProperty with sig. 'mm' on line 1364. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemCompoundProperty with sig. 'mm' on line 1367. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemCompoundProperty with sig. 'mm' on line 1370. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RemoveChemCompoundProperty with sig. 'm' on line 1373. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemBond with sig. 'mm' on line 1571. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemBond with sig. 'mm' on line 1574. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemBond with sig. 'mm' on line 1577. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemBond with sig. 'mm' on line 1580. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemBondAlias with sig. 'mm' on line 1583. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemBondAlias with sig. 'mm' on line 1586. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ShowChemBond with sig. 'm' on line 1589. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \bond with sig. 'm' on line 1592. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \chstoich with sig. 'm' on line 2191. ................................................. \l__chemformula_additions_symbol_space_skip=\skip109 ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemAdditionSymbol with sig. 'mmm' on line 2697. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemAdditionSymbol with sig. 'mmm' on line 2706. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemAdditionSymbol with sig. 'mmm' on line 2715. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemAdditionSymbol with sig. 'mmm' on line 2718. ................................................. \l__chemformula_plus_space_skip=\skip110 \l__chemformula_minus_space_skip=\skip111 ................................................. . LaTeX info: "xparse/define-command" . . Defining command \NewChemSymbol with sig. 'mm' on line 2763. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \ProvideChemSymbol with sig. 'mm' on line 2769. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \RenewChemSymbol with sig. 'mm' on line 2776. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \DeclareChemSymbol with sig. 'mm' on line 2779. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \chlewis with sig. 'O{}mm' on line 3334. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \setchemformula with sig. 'm' on line 3339. ................................................. ) (./my_thesis.aux (./head/dedication.aux) (./head/acknowledgements.aux) (./head/abstracts.aux) (./main/ch_introduction.aux) (./main/ch_lab_resources.aux) (./main/ch_encode_peaks.aux) (./main/ch_spark.aux) (./main/ch_smile-seq.aux) (./main/ch_pwmscan.aux) (./main/ch_atac-seq.aux LaTeX Warning: Label `encode_peaks_algo_ndr_extend' multiply defined. ) (./main/ch_discussion.aux) (./tail/appendix.aux) (./tail/biblio.aux) (./tail/cv.aux)) \openout1 = `my_thesis.aux'. LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for FML/futm/m/it on input line 18. LaTeX Font Info: Try loading font information for FML+futm on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutm.fd File: fmlfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futm. ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for FMS/futm/m/n on input line 18. LaTeX Font Info: Try loading font information for FMS+futm on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmsfutm.fd File: fmsfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FMS/futm. ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for FMX/futm/m/n on input line 18. LaTeX Font Info: Try loading font information for FMX+futm on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmxfutm.fd File: fmxfutm.fd futm-extension ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 18. LaTeX Font Info: Try loading font information for TS1+cmr on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd File: ts1cmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Try loading font information for T1+futs on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/t1futs.fd File: t1futs.fd 2004/03/02 Fontinst v1.926 font definitions for T1/futs. ) LaTeX Info: Redefining \degres on input line 18. LaTeX Info: Redefining \dots on input line 18. LaTeX Info: Redefining \up on input line 18. (/usr/share/texlive/texmf-dist/tex/context/base/mkii/supp-pdf.mkii [Loading MPS to PDF converter (version 2006.09.02).] \scratchcounter=\count347 \scratchdimen=\dimen363 \scratchbox=\box82 \nofMPsegments=\count348 \nofMParguments=\count349 \everyMPshowfont=\toks44 \MPscratchCnt=\count350 \MPscratchDim=\dimen364 \MPnumerator=\count351 \makeMPintoPDFobject=\count352 \everyMPtoPDFconversion=\toks45 ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty Package: epstopdf-base 2016/05/15 v2.6 Base part for package epstopdf (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/grfext.sty Package: grfext 2016/05/16 v1.2 Manage graphics extensions (HO) ) Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4 38. Package grfext Info: Graphics extension search list: (grfext) [.pdf,.png,.jpg,.mps,.jpeg,.jbig2,.jb2,.PDF,.PNG,.JPG,.JPE G,.JBIG2,.JB2,.eps] (grfext) \AppendGraphicsExtensions on input line 456. (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv e )) Package caption Info: Begin \AtBeginDocument code. Package caption Info: subfig package v1.3 is loaded. Package caption Info: float package is loaded. Package caption Info: hyperref package is loaded. Package caption Info: listings package is loaded. Package caption Info: End \AtBeginDocument code. LaTeX Info: Redefining \microtypecontext on input line 18. Package microtype Info: Generating PDF output. Package microtype Info: Character protrusion enabled (level 2). Package microtype Info: Using default protrusion set `alltext'. Package microtype Info: Automatic font expansion enabled (level 2), (microtype) stretch: 20, shrink: 20, step: 1, non-selected. Package microtype Info: Using default expansion set `basictext'. Package microtype Info: No adjustment of tracking. Package microtype Info: No adjustment of interword spacing. Package microtype Info: No adjustment of character kerning. Package microtype Info: Loading generic protrusion settings for font family (microtype) `futs' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. \c@lstlisting=\count353 \AtBeginShipoutBox=\box83 Package hyperref Info: Link coloring ON on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/hyperref/nameref.sty Package: nameref 2016/05/21 v2.44 Cross-referencing by name of section (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/gettitlestring.sty Package: gettitlestring 2016/05/16 v1.5 Cleanup title references (HO) ) \c@section@level=\count354 ) LaTeX Info: Redefining \ref on input line 18. LaTeX Info: Redefining \pageref on input line 18. LaTeX Info: Redefining \nameref on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/pdflscape.sty Package: pdflscape 2016/05/14 v0.11 Display of landscape pages in PDF (HO) (/usr/share/texlive/texmf-dist/tex/latex/graphics/lscape.sty Package: lscape 2000/10/22 v3.01 Landscape Pages (DPC) ) Package pdflscape Info: Auto-detected driver: pdftex on input line 81. ) ABD: EveryShipout initializing macros (/usr/share/texlive/texmf-dist/tex/latex/translations/translations-basic-dictio nary-english.trsl File: translations-basic-dictionary-english.trsl (english translation file `tra nslations-basic-dictionary') ) Package translations Info: loading dictionary `translations-basic-dictionary' f or `english'. on input line 18. ................................................. . chemgreek info: "mapping-activated" . . Activating mapping `fourier' on line 18. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \insitu with sig. 'O{}' on line 18. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \abinitio with sig. 'O{}' on line 18. ................................................. ................................................. . LaTeX info: "xparse/define-command" . . Defining command \invacuo with sig. 'O{}' on line 18. ................................................. (./head/titlepage.tex LaTeX Font Info: Try loading font information for T1+lmss on input line 5. (/usr/share/texmf/tex/latex/lm/t1lmss.fd File: t1lmss.fd 2009/10/30 v1.6 Font defs for Latin Modern ) Package microtype Info: Loading generic protrusion settings for font family (microtype) `lmss' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. LaTeX Font Info: Try loading font information for FML+futmi on input line 14 . (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutmi.fd File: fmlfutmi.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futmi. ) LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 10.07397pt on input line 14. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 7.63599pt on input line 14. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 5.51999pt on input line 14. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 10.07397pt on input line 14. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 7.63599pt on input line 14. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 5.51999pt on input line 14. LaTeX Font Info: Try loading font information for U+msa on input line 14. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsa.fd File: umsa.fd 2013/01/14 v3.01 AMS symbols A ) (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msa.cfg File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS) ) LaTeX Font Info: Try loading font information for U+msb on input line 14. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsb.fd File: umsb.fd 2013/01/14 v3.01 AMS symbols B ) (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msb.cfg File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS) ) File: images/epfl.pdf Graphic file (type pdf) Package pdftex.def Info: images/epfl.pdf used on input line 15. (pdftex.def) Requested size: 113.81102pt x 49.4394pt. Overfull \hbox (23.99998pt too wide) in paragraph at lines 14--42 [][] [] [1 {/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} <./images/epfl.pdf>]) \openout2 = `head/dedication.aux'. (./head/dedication.tex [2 ]) [3] \openout2 = `head/acknowledgements.aux'. (./head/acknowledgements.tex [0 ] LaTeX Font Info: Font shape `T1/futs/bx/n' in size <10.95> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 1. LaTeX Font Info: Font shape `T1/futs/bx/n' in size <24.88> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 1. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 22.88956pt on input line 1. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 15.89755pt on input line 1. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 11.03998pt on input line 1. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 22.88956pt on input line 1. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 15.89755pt on input line 1. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 11.03998pt on input line 1. )pdfTeX warning (ext4): destination with the same identifier (name{page.i}) has been already used, duplicate ignored \relax l.25 \include{head/acknowledgements} [1] \openout2 = `head/abstracts.aux'. (./head/abstracts.texpdfTeX warning (ext4): destination with the same identifi er (name{page.ii}) has been already used, duplicate ignored \relax l.6 \cleardoublepage [2 ] Underfull \vbox (badness 10000) has occurred while \output is active [] pdfTeX warning (ext4): destination with the same identifier (name{page.iii}) ha s been already used, duplicate ignored \relax l.27 [3 ] [4] Underfull \vbox (badness 10000) has occurred while \output is active [] [5 ]) [6] (./my_thesis.toc [7 ] [8] Overfull \hbox (1.22647pt too wide) detected at line 117 \T1/futs/m/n/10.95 7.10.10 [] Overfull \hbox (1.22647pt too wide) detected at line 118 \T1/futs/m/n/10.95 7.10.11 [] [9]) \tf@toc=\write4 \openout4 = `my_thesis.toc'. [10] \openout2 = `main/ch_introduction.aux'. (./main/ch_introduction.tex Chapter 1. LaTeX Font Info: Font shape `T1/futs/bx/n' in size <14.4> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 13. LaTeX Font Info: Font shape `T1/futs/bx/n' in size <12> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 18. File: images/ch_introduction/chromatin.png Graphic file (type png) Package pdftex.def Info: images/ch_introduction/chromatin.png used on input li ne 23. (pdftex.def) Requested size: 314.13602pt x 427.8417pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [1 ] Underfull \vbox (badness 10000) has occurred while \output is active [] [2] [3 <./images/ch_introduction/chromatin.png>] File: images/ch_introduction/nucleosome_positioning.png Graphic file (type png) Package pdftex.def Info: images/ch_introduction/nucleosome_positioning.png use d on input line 57. (pdftex.def) Requested size: 290.89583pt x 230.46324pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [4] [5 <./images/ch_introduction/nucleosome_positioning.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [6] File: images/ch_introduction/TF_associations.png Graphic file (type png) Package pdftex.def Info: images/ch_introduction/TF_associations.png used on in put line 103. (pdftex.def) Requested size: 267.18777pt x 152.59023pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [7] Underfull \vbox (badness 10000) has occurred while \output is active [] [8 <./images/ch_introduction/TF_associations.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [9] Underfull \vbox (badness 10000) has occurred while \output is active [] [10] Underfull \vbox (badness 10000) has occurred while \output is active [] [11] Underfull \vbox (badness 10000) has occurred while \output is active [] [12] Underfull \vbox (badness 10000) has occurred while \output is active [] [13] Underfull \vbox (badness 10000) has occurred while \output is active [] [14] File: images/ch_introduction/dgf.png Graphic file (type png) Package pdftex.def Info: images/ch_introduction/dgf.png used on input line 238 . (pdftex.def) Requested size: 341.60641pt x 318.3888pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [15] [16 <./images/ch_introduction/dgf.png>] LaTeX Font Info: Try loading font information for TS1+futs on input line 273 . (/usr/share/texlive/texmf-dist/tex/latex/fourier/ts1futs.fd File: ts1futs.fd 2004/03/26 Fontinst v1.926 font definitions for TS1/futs. ) Package microtype Info: Loading generic protrusion settings for font family (microtype) `futs' (encoding: TS1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. Underfull \vbox (badness 10000) has occurred while \output is active [] [17] Underfull \vbox (badness 10000) has occurred while \output is active [] [18] File: images/ch_introduction/figure_pwm.png Graphic file (type png) Package pdftex.def Info: images/ch_introduction/figure_pwm.png used on input l ine 341. (pdftex.def) Requested size: 290.52025pt x 182.75955pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [19] Underfull \vbox (badness 10000) has occurred while \output is active [] [20 <./images/ch_introduction/figure_pwm.png>] File: images/ch_introduction/shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_introduction/shift_flip.png used on input l ine 394. (pdftex.def) Requested size: 425.19179pt x 78.7749pt. Overfull \hbox (7.50533pt too wide) in paragraph at lines 394--395 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [21 <./images/ch_introduction/shift_flip.png>]) Underfull \vbox (badness 10000) has occurred while \output is active [] [22] [23] \openout2 = `main/ch_lab_resources.aux'. (./main/ch_lab_resources.tex [24 ] Chapter 2. Underfull \vbox (badness 10000) has occurred while \output is active [] [25 ] File: images/ch_lab_resources/mga_figure1.jpeg Graphic file (type jpg) Package pdftex.def Info: images/ch_lab_resources/mga_figure1.jpeg used on inpu t line 25. (pdftex.def) Requested size: 400.23181pt x 134.13329pt. [26 <./images/ch_lab_resources/mga_figure1.jpeg>] [27] File: images/ch_lab_resources/epd_figure1.jpeg Graphic file (type jpg) Package pdftex.def Info: images/ch_lab_resources/epd_figure1.jpeg used on inpu t line 61. (pdftex.def) Requested size: 215.12772pt x 174.80144pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [28 <./images/ch_lab_resources/epd_figure1.jpeg>] File: images/ch_lab_resources/epd_motifs.png Graphic file (type png) Package pdftex.def Info: images/ch_lab_resources/epd_motifs.png used on input line 113. (pdftex.def) Requested size: 346.89868pt x 173.44933pt. [29] Underfull \vbox (badness 2600) has occurred while \output is active [] [30 <./images/ch_lab_resources/epd_motifs.png (PNG copy)>]) [31] \openout2 = `main/ch_encode_peaks.aux'. (./main/ch_encode_peaks.tex [32 ] Chapter 3. File: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png used on input line 23. (pdftex.def) Requested size: 520.34802pt x 173.44933pt. Overfull \hbox (102.66156pt too wide) in paragraph at lines 23--24 [] [] File: images/ch_encode_peaks/peaklist_proportions_GM12878.png Graphic file (typ e png) Package pdftex.def Info: images/ch_encode_peaks/peaklist_proportions_GM12878.pn g used on input line 32. (pdftex.def) Requested size: 520.34802pt x 173.44933pt. Overfull \hbox (102.66156pt too wide) in paragraph at lines 32--33 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [33 ] [34 <./images/ch_encode_peaks/peaklist_peaknumber_GM12878.png (PNG copy)> <./im ages/ch_encode_peaks/peaklist_proportions_GM12878.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] [35] Underfull \vbox (badness 10000) has occurred while \output is active [] [36] File: images/ch_encode_peaks/MNase_profiles.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/MNase_profiles.png used on inp ut line 103. (pdftex.def) Requested size: 377.15814pt x 259.5691pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [37] [38 <./images/ch_encode_peaks/MNase_profiles.png>] File: images/ch_encode_peaks/colocalization_ctcf.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/colocalization_ctcf.png used o n input line 131. (pdftex.def) Requested size: 403.20538pt x 320.54678pt. File: images/ch_encode_peaks/CTCF_ndr_length_rad212.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/CTCF_ndr_length_rad212.png use d on input line 139. (pdftex.def) Requested size: 404.70483pt x 231.2599pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [39] [40 <./images/ch_encode_peaks/colocalization_ctcf.png>] [41 <./images/ch_encode _peaks/CTCF_ndr_length_rad212.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] [42] File: images/ch_encode_peaks/ctcf_motif_association.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ctcf_motif_association.png use d on input line 177. (pdftex.def) Requested size: 433.61232pt x 339.18118pt. Overfull \hbox (15.92586pt too wide) in paragraph at lines 177--178 [] [] LaTeX Warning: Float too large for page by 31.83305pt on input line 233. Underfull \vbox (badness 10000) has occurred while \output is active [] [43] [44 <./images/ch_encode_peaks/ctcf_motif_association.png>] [45] Underfull \vbox (badness 10000) has occurred while \output is active [] [46] File: images/ch_encode_peaks/ebf1_haib_1.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_1.png used on input line 262. (pdftex.def) Requested size: 260.16739pt x 346.88986pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [47] [48 <./images/ch_encode_peaks/ebf1_haib_1.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] [49] Underfull \vbox (badness 10000) has occurred while \output is active [] [50] Underfull \vbox (badness 10000) has occurred while \output is active [] [51] Underfull \vbox (badness 10000) has occurred while \output is active [] [52] Underfull \vbox (badness 10000) has occurred while \output is active [] [53] Underfull \vbox (badness 10000) has occurred while \output is active [] [54] LaTeX Font Info: Font shape `T1/futs/bx/n' in size <8> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 455. LaTeX Font Info: Try loading font information for T1+lmtt on input line 455. (/usr/share/texmf/tex/latex/lm/t1lmtt.fd File: t1lmtt.fd 2009/10/30 v1.6 Font defs for Latin Modern ) Package microtype Info: Loading generic protrusion settings for font family (microtype) `lmtt' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. Underfull \vbox (badness 10000) has occurred while \output is active [] [55] Underfull \vbox (badness 10000) has occurred while \output is active [] [56] Overfull \hbox (9.9085pt too wide) in paragraph at lines 478--479 \T1/futs/m/n/10.95 (-20) ences were the cor-rected EBF1 peaks (wgEn-codeAwgTf-b -sHaibGm12878Ebf1sc137065Pcr1xUniPk [] ) Underfull \vbox (badness 10000) has occurred while \output is active [] [57] [58] \openout2 = `main/ch_spark.aux'. (./main/ch_spark.tex Chapter 4. Package hyperref Info: bookmark level for unknown toc defaults to 0 on input li ne 5. -Package natbib Warning: Citation `nielsen_catchprofiles' on page 59 undefined o -n input line 13. - - Underfull \vbox (badness 10000) has occurred while \output is active [] [59 ] - -File: images/ch_spark/supplemental_figure1.pdf Graphic file (type pdf) - -Package pdftex.def Info: images/ch_spark/supplemental_figure1.pdf used on inpu + +File: images/ch_spark/supplemental_figure1.png Graphic file (type png) + +Package pdftex.def Info: images/ch_spark/supplemental_figure1.png used on inpu t line 42. (pdftex.def) Requested size: 404.70483pt x 202.3524pt. - -File: images/ch_spark/supplemental_figure2.pdf Graphic file (type pdf) - -Package pdftex.def Info: images/ch_spark/supplemental_figure2.pdf used on inpu -t line 49. + +File: images/ch_spark/supplemental_figure2.png Graphic file (type png) + +Package pdftex.def Info: images/ch_spark/supplemental_figure2.png used on inpu +t line 50. (pdftex.def) Requested size: 462.5198pt x 202.3524pt. - -File: images/ch_spark/supplemental_figure4.pdf Graphic file (type pdf) - -Package pdftex.def Info: images/ch_spark/supplemental_figure4.pdf used on inpu -t line 57. + +File: images/ch_spark/supplemental_figure4.png Graphic file (type png) + +Package pdftex.def Info: images/ch_spark/supplemental_figure4.png used on inpu +t line 58. (pdftex.def) Requested size: 289.07487pt x 173.44492pt. - -File: images/ch_spark/supplemental_figure5.pdf Graphic file (type pdf) - -Package pdftex.def Info: images/ch_spark/supplemental_figure5.pdf used on inpu -t line 65. + +File: images/ch_spark/supplemental_figure5.png Graphic file (type png) + +Package pdftex.def Info: images/ch_spark/supplemental_figure5.png used on inpu +t line 66. (pdftex.def) Requested size: 289.07487pt x 173.44492pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [60] -Overfull \hbox (24.71843pt too wide) in paragraph at lines 74--75 +[61 <./images/ch_spark/supplemental_figure1.png (PNG copy)>] [62 <./images/ch_s +park/supplemental_figure2.png (PNG copy)>] [63 <./images/ch_spark/supplemental_ +figure4.png> <./images/ch_spark/supplemental_figure5.png (PNG copy)>] +Overfull \hbox (24.71843pt too wide) in paragraph at lines 75--76 [][]\T1/futs/m/n/10.95 (-20) 2006[][]) was used. Calls to kc-caFam-ily(dist=dis tEuclidean, cent=centMean) or kc-caFam-ily(dist=distCor, [] -Overfull \hbox (3.89964pt too wide) in paragraph at lines 77--78 +Overfull \hbox (3.89964pt too wide) in paragraph at lines 78--79 \T1/futs/m/n/10.95 (-20) The im-ple-men-ta-tion was done in R pro-gram-ming lan -guage. The "em_shape", "em_shape_shift" [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [61 <./images/ch_spark/supplemental_figure1.pdf>] [62 <./images/ch_spark/suppl -emental_figure2.pdf>] [63 <./images/ch_spark/supplemental_figure4.pdf> <./image -s/ch_spark/supplemental_figure5.pdf>] -Underfull \hbox (badness 10000) in paragraph at lines 89--90 + [64] +Underfull \hbox (badness 10000) in paragraph at lines 90--91 [] - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [64] - -File: images/ch_spark/supplemental_figure8.pdf Graphic file (type pdf) - -Package pdftex.def Info: images/ch_spark/supplemental_figure8.pdf used on inpu -t line 98. + +File: images/ch_spark/figure1.png Graphic file (type png) + +Package pdftex.def Info: images/ch_spark/figure1.png used on input line 99. (pdftex.def) Requested size: 404.70483pt x 231.2599pt. - -File: images/ch_spark/figure1.pdf Graphic file (type pdf) - -Package pdftex.def Info: images/ch_spark/figure1.pdf used on input line 105. + +File: images/ch_spark/supplemental_figure8.png Graphic file (type png) + +Package pdftex.def Info: images/ch_spark/supplemental_figure8.png used on inpu +t line 107. (pdftex.def) Requested size: 404.70483pt x 231.2599pt. -) -[65] [66 <./images/ch_spark/supplemental_figure8.pdf>] [67 <./images/ch_spark/f -igure1.pdf>] -\openout2 = `main/ch_smile-seq.aux'. - - (./main/ch_smile-seq.tex [68 +Underfull \vbox (badness 10000) has occurred while \output is active [] + [65] +[66 <./images/ch_spark/figure1.png (PNG copy)>] [67 <./images/ch_spark/suppleme +ntal_figure8.png (PNG copy)>]) [68] +\openout2 = `main/ch_smile-seq.aux'. -] + (./main/ch_smile-seq.tex Chapter 5. - + File: images/ch_smile-seq/figure1.jpg Graphic file (type jpg) Package pdftex.def Info: images/ch_smile-seq/figure1.jpg used on input line 23 . (pdftex.def) Requested size: 232.36755pt x 301.62613pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [69 + + + ] [70 <./images/ch_smile-seq/figure1.jpg>] - + File: images/ch_smile-seq/figure_hmm.png Graphic file (type png) Package pdftex.def Info: images/ch_smile-seq/figure_hmm.png used on input line 41. (pdftex.def) Requested size: 416.22516pt x 215.09944pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [71 <./images/ch_smile-seq/figure_hmm.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [72] - + File: images/ch_smile-seq/figure2b_3a.png Graphic file (type png) Package pdftex.def Info: images/ch_smile-seq/figure2b_3a.png used on input lin e 118. (pdftex.def) Requested size: 398.92334pt x 166.8203pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [73] [74 <./images/ch_smile-seq/figure2b_3a.png>]) [75] \openout2 = `main/ch_pwmscan.aux'. (./main/ch_pwmscan.tex [76 ] Chapter 6. Underfull \vbox (badness 10000) has occurred while \output is active [] [77 ] Underfull \vbox (badness 10000) has occurred while \output is active [] [78] - File: images/ch_lab_resources/pwmscan_flowchart.png Graphic file (type png) Package pdftex.def Info: images/ch_lab_resources/pwmscan_flowchart.png used on input line 51. (pdftex.def) Requested size: 279.21945pt x 370.52591pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [79] [80 <./images/ch_lab_resources/pwmscan_flowchart.png>] - File: images/ch_lab_resources/pwmscan_figure_s1.png Graphic file (type png) Package pdftex.def Info: images/ch_lab_resources/pwmscan_figure_s1.png used on input line 86. (pdftex.def) Requested size: 269.60248pt x 153.77177pt. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 8.27998pt on input line 99. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 6.99199pt on input line 99. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 8.27998pt on input line 99. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 6.99199pt on input line 99. Underfull \vbox (badness 2875) has occurred while \output is active [] [81 <./images/ch_lab_resources/pwmscan_figure_s1.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [82]) [83] \openout2 = `main/ch_atac-seq.aux'. (./main/ch_atac-seq.tex [84 ] Chapter 7. Underfull \vbox (badness 10000) has occurred while \output is active [] [85 ] Underfull \vbox (badness 10000) has occurred while \output is active [] [86] - + File: images/ch_atac-seq/em.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/em.png used on input line 79. (pdftex.def) Requested size: 465.4215pt x 348.04185pt. Overfull \hbox (47.73503pt too wide) in paragraph at lines 79--81 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [87] [88 <./images/ch_atac-seq/em.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [89] Underfull \vbox (badness 10000) has occurred while \output is active [] [90] Underfull \vbox (badness 10000) has occurred while \output is active [] [91] Underfull \vbox (badness 10000) has occurred while \output is active [] [92] - File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png Graphic fil e (type png) Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas s_2.png used on input line 253. (pdftex.def) Requested size: 455.30783pt x 404.71806pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 253--254 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [93] [94 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png (PNG copy) >] - + File: images/ch_atac-seq/data_classCTCF_8class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/data_classCTCF_8class.png used on input line 280. (pdftex.def) Requested size: 433.62335pt x 216.81166pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 280--281 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [95] [96 <./images/ch_atac-seq/data_classCTCF_8class.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] [97] Underfull \vbox (badness 10000) has occurred while \output is active [] [98] Underfull \vbox (badness 10000) has occurred while \output is active [] [99] Underfull \vbox (badness 10000) has occurred while \output is active [] [100] Underfull \vbox (badness 10000) has occurred while \output is active [] [101] Overfull \vbox (45.20699pt too high) has occurred while \output is active [] [102] [103] [104] Underfull \vbox (badness 10000) has occurred while \output is active [] [105] Underfull \hbox (badness 3343) in paragraph at lines 511--511 \T1/futs/m/n/10.95 (+20) FOSL2, JUNB, JUN::JUNB, FOSL1::JUND, FOS::JUN, [] Overfull \hbox (5.93637pt too wide) in paragraph at lines 503--534 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [106] [107] Underfull \vbox (badness 10000) has occurred while \output is active [] [108]) [109] \openout2 = `main/ch_discussion.aux'. (./main/ch_discussion.tex [110 ] Chapter 8. Underfull \vbox (badness 10000) has occurred while \output is active [] [111 ] Underfull \vbox (badness 10000) has occurred while \output is active [] [112]) [113] [114 ] \openout2 = `tail/appendix.aux'. (./tail/appendix.tex Appendix A. +s_EM_4class_15shift_flip.png, id=2119, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_al lpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUni Pk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input line 21. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [115 ] Overfull \vbox (192.32838pt too high) has occurred while \output is active [] [116 <./images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_ allpeaks_EM_4class_15shift_flip.png (PNG copy)>] +_allpeaks_EM_4class_15shift_flip.png, id=2128, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_GM 12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1I ggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input lin e 29. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. +aks_EM_4class_15shift_flip.png, id=2129, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM12878_ allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosU niPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input line 37. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [117 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase _GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] +allpeaks_EM_4class_15shift_flip.png, id=2134, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_GM1 2878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIg gmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input line 45. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [118 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM128 78_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] +12878_allpeaks_EM_4class_15shift_flip.png, id=2139, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_MNa se_GM12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1 a300IggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on inpu t line 53. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [119 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_ GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] - + File: images/ch_encode_peaks/ctcf_ndr.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ctcf_ndr.png used on input lin e 61. (pdftex.def) Requested size: 346.89647pt x 462.52863pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [120 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_ MNase_GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] - File: images/ch_encode_peaks/jund_motif_association.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/jund_motif_association.png use d on input line 69. (pdftex.def) Requested size: 433.61232pt x 339.18118pt. Overfull \hbox (15.92586pt too wide) in paragraph at lines 69--70 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [121 <./images/ch_encode_peaks/ctcf_ndr.png (PNG copy)>] - + File: images/ch_encode_peaks/ebf1_haib_3.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_3.png used on input line 77. (pdftex.def) Requested size: 260.16739pt x 115.62994pt. - + File: images/ch_encode_peaks/MA0154_3.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/MA0154_3.png used on input lin e 85. (pdftex.def) Requested size: 361.3491pt x 180.67456pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [122 <./images/ch_encode_peaks/jund_motif_association.png>] - + File: images/ch_encode_peaks/ebf1_haib_2.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_2.png used on input line 93. (pdftex.def) Requested size: 260.16739pt x 346.88986pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [123 <./images/ch_encode_peaks/ebf1_haib_3.png (PNG copy)> <./images/ch_encode _peaks/MA0154_3.png>] [124 <./images/ch_encode_peaks/ebf1_haib_2.png (PNG copy) >] Underfull \vbox (badness 10000) has occurred while \output is active [] [125] Overfull \vbox (122.1795pt too high) has occurred while \output is active [] [126] Underfull \vbox (badness 10000) has occurred while \output is active [] [127] Underfull \vbox (badness 10000) has occurred while \output is active [] [128] Overfull \vbox (48.86317pt too high) has occurred while \output is active [] [129] Underfull \vbox (badness 10000) has occurred while \output is active [] [130] Underfull \vbox (badness 10000) has occurred while \output is active [] [131] Overfull \vbox (102.86353pt too high) has occurred while \output is active [] [132] Underfull \vbox (badness 10000) has occurred while \output is active [] [133] Overfull \vbox (118.59161pt too high) has occurred while \output is active [] [134] Underfull \vbox (badness 10000) has occurred while \output is active [] [135] Overfull \vbox (103.50354pt too high) has occurred while \output is active [] [136] Underfull \vbox (badness 10000) has occurred while \output is active [] [137] - File: images/ch_smile-seq/figure_s4_reproduced.png Graphic file (type png) Package pdftex.def Info: images/ch_smile-seq/figure_s4_reproduced.png used on input line 526. (pdftex.def) Requested size: 424.06316pt x 235.07848pt. Overfull \hbox (6.3767pt too wide) in paragraph at lines 526--527 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [138] - + File: images/ch_atac-seq/fragment_lengths.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/fragment_lengths.png used on input line 541. (pdftex.def) Requested size: 433.62335pt x 130.087pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 541--542 [] [] LaTeX Warning: `h' float specifier changed to `ht'. - File: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png used on input line 549. (pdftex.def) Requested size: 346.88986pt x 260.16739pt. LaTeX Warning: `h' float specifier changed to `ht'. [139 <./images/ch_smile-seq/figure_s4_reproduced.png>] - File: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png Graphic file (type png ) Package pdftex.def Info: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png us ed on input line 568. (pdftex.def) Requested size: 390.26102pt x 195.1305pt. LaTeX Warning: `h' float specifier changed to `ht'. Underfull \vbox (badness 10000) has occurred while \output is active [] [140 <./images/ch_atac-seq/fragment_lengths.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] [141 <./images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] [142 <./images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png (PNG copy)>] - File: images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png Graphi c file (type png) Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_bes t_motifs.png used on input line 607. (pdftex.def) Requested size: 202.3524pt x 231.2599pt. LaTeX Warning: `h' float specifier changed to `ht'. - File: images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png Graphic fi le (type png) Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_auc _roc.png used on input line 615. (pdftex.def) Requested size: 346.88986pt x 173.44492pt. LaTeX Warning: `h' float specifier changed to `ht'. - + File: images/ch_atac-seq/sp1_motifs_7class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_7class.png used on inpu t line 623. (pdftex.def) Requested size: 455.30783pt x 303.53854pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 623--624 [] [] LaTeX Warning: `h' float specifier changed to `ht'. - + File: images/ch_atac-seq/sp1_motifs_10class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_10class.png used on inp ut line 631. (pdftex.def) Requested size: 455.30783pt x 303.53854pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 631--632 [] [] LaTeX Warning: `h' float specifier changed to `ht'. Underfull \vbox (badness 10000) has occurred while \output is active [] [143] Underfull \vbox (badness 2088) has occurred while \output is active [] [144 <./images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png (PN G copy)>] [145 <./images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png (PNG cop y)>] - File: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png used on input line 653. (pdftex.def) Requested size: 390.26102pt x 173.44933pt. LaTeX Warning: `h' float specifier changed to `ht'. - File: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png used on input line 661. (pdftex.def) Requested size: 390.26102pt x 173.44933pt. LaTeX Warning: `h' float specifier changed to `ht'. - File: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png Graphic file (type p ng) Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png used on input line 669. (pdftex.def) Requested size: 455.30783pt x 202.35902pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 669--670 [] [] LaTeX Warning: `h' float specifier changed to `ht'. - File: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png Graphic file (type pn g) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png u sed on input line 677. (pdftex.def) Requested size: 390.26102pt x 173.44933pt. LaTeX Warning: `h' float specifier changed to `ht'. Underfull \vbox (badness 10000) has occurred while \output is active [] [146 <./images/ch_atac-seq/sp1_motifs_7class.png (PNG copy)>] Underfull \vbox (badness 4341) has occurred while \output is active [] [147 <./images/ch_atac-seq/sp1_motifs_10class.png (PNG copy)>] - File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas s.png used on input line 698. (pdftex.def) Requested size: 455.30783pt x 404.71806pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 698--699 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] [148 <./images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png (PNG copy)>] Overfull \vbox (211.22089pt too high) has occurred while \output is active [] [149 <./images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png (PNG copy)> <./im ages/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png (PNG copy)>] - + File: images/ch_atac-seq/data_classPU1_2class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/data_classPU1_2class.png used on i nput line 706. (pdftex.def) Requested size: 433.62335pt x 216.81166pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 706--707 [] [] Overfull \vbox (26.60411pt too high) has occurred while \output is active [] [150 <./images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png (PNG copy)> <./ima ges/ch_atac-seq/data_classPU1_2class.png (PNG copy)>] - + File: images/ch_atac-seq/data_classjun_3class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/data_classjun_3class.png used on i nput line 714. (pdftex.def) Requested size: 433.62335pt x 216.81166pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 714--715 [] [] ) [151 <./images/ch_atac-seq/sp1_motifs_6class_shift_flip.png (PNG copy)> <./im ages/ch_atac-seq/data_classjun_3class.png (PNG copy)>] [152 ] \openout2 = `tail/biblio.aux'. (./tail/biblio.tex (./my_thesis.bbl [153 ] [154] [155] [156] [157] [158] [159] [160] [161] [162] [163] [164])) [165] \openout2 = `tail/cv.aux'. (./tail/cv.tex [166 ] - + File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf used on input line 6. (pdftex.def) Requested size: 597.5064pt x 845.01631pt. File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf used on input line 6. (pdftex.def) Requested size: 597.5064pt x 845.01631pt. - + File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.5064pt x 845.01631pt. File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. [167 <./tail/cv_en.pdf>] - + File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page2 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page2 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page2 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. [168 <./tail/cv_en.pdf>] - + File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page3 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page3 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. File: tail/cv_en.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv_en.pdf , page3 used on input line 6. (pdftex.def) Requested size: 597.53374pt x 845.055pt. [169 <./tail/cv_en.pdf>]) - -Package natbib Warning: There were undefined citations. - Package atveryend Info: Empty hook `BeforeClearDocument' on input line 80. Package atveryend Info: Empty hook `AfterLastShipout' on input line 80. -(./my_thesis.aux (./head/dedication.aux) (./head/acknowledgements.aux) -(./head/abstracts.aux) (./main/ch_introduction.aux) -(./main/ch_lab_resources.aux) (./main/ch_encode_peaks.aux) (./main/ch_spark.aux -) (./main/ch_smile-seq.aux) (./main/ch_pwmscan.aux) (./main/ch_atac-seq.aux) -(./main/ch_discussion.aux) (./tail/appendix.aux) (./tail/biblio.aux) -(./tail/cv.aux)) + (./my_thesis.aux +(./head/dedication.aux) (./head/acknowledgements.aux) (./head/abstracts.aux) +(./main/ch_introduction.aux) (./main/ch_lab_resources.aux) +(./main/ch_encode_peaks.aux) (./main/ch_spark.aux) (./main/ch_smile-seq.aux) +(./main/ch_pwmscan.aux) (./main/ch_atac-seq.aux) (./main/ch_discussion.aux) +(./tail/appendix.aux) (./tail/biblio.aux) (./tail/cv.aux)) Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 80. Package atveryend Info: Empty hook `AtEndAfterFileList' on input line 80. LaTeX Warning: There were multiply-defined labels. Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 80. ) Here is how much of TeX's memory you used: - 42063 strings out of 492982 - 820563 string characters out of 6134895 - 1057797 words of memory out of 5000000 - 43568 multiletter control sequences out of 15000+600000 + 42062 strings out of 492982 + 820396 string characters out of 6134895 + 1063875 words of memory out of 5000000 + 43567 multiletter control sequences out of 15000+600000 732775 words of font info for 397 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 73i,24n,99p,10424b,1319s stack positions out of 5000i,500n,10000p,200000b,80000s {/usr/share/texmf/fonts/enc/dvips/lm/lm-ec.enc}{/usr/share/texlive/texmf-dist /fonts/enc/dvips/base/8r.enc} -Output written on my_thesis.pdf (183 pages, 108761454 bytes). +Output written on my_thesis.pdf (183 pages, 81807506 bytes). PDF statistics: - 3405 PDF objects out of 3580 (max. 8388607) - 3013 compressed objects within 31 object streams + 3374 PDF objects out of 3580 (max. 8388607) + 2984 compressed objects within 30 object streams 905 named destinations out of 1000 (max. 500000) 38683 words of extra memory for PDF output out of 42996 (max. 10000000) diff --git a/my_thesis.pdf b/my_thesis.pdf index 5340b2f..b7b9469 100644 Binary files a/my_thesis.pdf and b/my_thesis.pdf differ diff --git a/my_thesis.synctex.gz b/my_thesis.synctex.gz index e03e105..d4ae83c 100644 Binary files a/my_thesis.synctex.gz and b/my_thesis.synctex.gz differ diff --git a/my_thesis.toc b/my_thesis.toc index 7444e6c..313d57d 100644 --- a/my_thesis.toc +++ b/my_thesis.toc @@ -1,135 +1,135 @@ \babel@toc {english}{} \babel@toc {french}{} \babel@toc {english}{} \contentsline {chapter}{Acknowledgements}{i}{chapter*.1} \contentsline {chapter}{Abstract (English/Fran\IeC {\c c}ais/Deutsch)}{iii}{chapter*.2} \babel@toc {french}{} \babel@toc {english}{} \contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1} \contentsline {chapter}{Introduction}{1}{chapter.1} \contentsline {section}{\numberline {1.1}About chromatin}{1}{section.1.1} \contentsline {subsection}{\numberline {1.1.1}The chromatin structure}{2}{subsection.1.1.1} \contentsline {subsection}{\numberline {1.1.2}The chromatin is dynamic}{2}{subsection.1.1.2} \contentsline {subsection}{\numberline {1.1.3}About nucleosome positioning}{4}{subsection.1.1.3} \contentsline {section}{\numberline {1.2}About transcription factors}{7}{section.1.2} \contentsline {subsection}{\numberline {1.2.1}TF co-binding}{7}{subsection.1.2.1} \contentsline {section}{\numberline {1.3}Gene regulation in a nutshell}{9}{section.1.3} \contentsline {subsection}{\numberline {1.3.1}The chromatin barrier}{9}{subsection.1.3.1} \contentsline {subsection}{\numberline {1.3.2}TFs cooperative binding}{9}{subsection.1.3.2} \contentsline {subsection}{\numberline {1.3.3}Pioneer TFs}{10}{subsection.1.3.3} \contentsline {subsection}{\numberline {1.3.4}Regulatory elements}{10}{subsection.1.3.4} \contentsline {subsection}{\numberline {1.3.5}The genome goes 3D}{11}{subsection.1.3.5} \contentsline {section}{\numberline {1.4}Measuring chromatin features}{12}{section.1.4} \contentsline {subsection}{\numberline {1.4.1}Measuring TF binding in vivo}{12}{subsection.1.4.1} \contentsline {subsection}{\numberline {1.4.2}Measuring TF binding in vitro}{13}{subsection.1.4.2} \contentsline {subsection}{\numberline {1.4.3}Measuring nucleosome occupancy}{14}{subsection.1.4.3} \contentsline {subsection}{\numberline {1.4.4}Digital footprinting}{15}{subsection.1.4.4} \contentsline {section}{\numberline {1.5}Modeling sequence specificity}{17}{section.1.5} \contentsline {subsubsection}{The physics approach to PWMs}{17}{section.1.5} \contentsline {subsubsection}{The statistical mechanic approach to PWMs}{18}{equation.1.5.2} \contentsline {subsection}{\numberline {1.5.1}Aligning binding sites}{19}{subsection.1.5.1} \contentsline {subsection}{\numberline {1.5.2}Platitudes}{20}{subsection.1.5.2} \contentsline {subsection}{\numberline {1.5.3}Predicting binding sites}{20}{subsection.1.5.3} \contentsline {section}{\numberline {1.6}Over-represented patterns discovery}{21}{section.1.6} \contentsline {chapter}{\numberline {2}Laboratory resources}{25}{chapter.2} \contentsline {chapter}{Laboratory resources}{25}{chapter.2} \contentsline {section}{\numberline {2.1}Mass Genome Annotation repository}{25}{section.2.1} \contentsline {subsection}{\numberline {2.1.1}MGA content and organization}{26}{subsection.2.1.1} \contentsline {subsection}{\numberline {2.1.2}Conclusions}{27}{subsection.2.1.2} \contentsline {section}{\numberline {2.2}Eukaryotic Promoter Database}{28}{section.2.2} \contentsline {subsection}{\numberline {2.2.1}EPDnew now annotates (some of) your mushrooms and vegetables}{29}{subsection.2.2.1} \contentsline {subsection}{\numberline {2.2.2}Increased mapping precision in human}{30}{subsection.2.2.2} \contentsline {subsection}{\numberline {2.2.3}Integration of EPDnew with other resources}{30}{subsection.2.2.3} \contentsline {subsection}{\numberline {2.2.4}Conclusions}{31}{subsection.2.2.4} \contentsline {subsection}{\numberline {2.2.5}Methods}{31}{subsection.2.2.5} \contentsline {subsubsection}{Motif occurrence profiles}{31}{subsection.2.2.5} \contentsline {chapter}{\numberline {3}ENCODE peaks analysis}{33}{chapter.3} \contentsline {chapter}{ENCODE peaks analysis}{33}{chapter.3} \contentsline {section}{\numberline {3.1}Data}{33}{section.3.1} \contentsline {section}{\numberline {3.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{35}{section.3.2} \contentsline {subsection}{\numberline {3.2.1}Data realignment}{36}{subsection.3.2.1} \contentsline {section}{\numberline {3.3}Nucleosome organization around transcription factor binding sites}{37}{section.3.3} -\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{39}{section.3.4} +\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{42}{section.3.4} \contentsline {section}{\numberline {3.5}CTCF and JunD interactomes}{43}{section.3.5} \contentsline {section}{\numberline {3.6}EBF1 binds nucleosomes}{47}{section.3.6} \contentsline {section}{\numberline {3.7}Discussion}{50}{section.3.7} \contentsline {section}{\numberline {3.8}Methods}{50}{section.3.8} \contentsline {subsection}{\numberline {3.8.1}Data and data processing}{50}{subsection.3.8.1} \contentsline {subsection}{\numberline {3.8.2}Classification of MNase patterns}{51}{subsection.3.8.2} \contentsline {subsection}{\numberline {3.8.3}Quantifying nucleosome array intensity from classification results}{52}{subsection.3.8.3} \contentsline {subsection}{\numberline {3.8.4}Peak colocalization}{53}{subsection.3.8.4} \contentsline {subsection}{\numberline {3.8.5}NDR detection}{54}{subsection.3.8.5} \contentsline {subsection}{\numberline {3.8.6}CTCF and JunD interactors}{56}{subsection.3.8.6} \contentsline {subsection}{\numberline {3.8.7}EBF1 and nucleosome}{57}{subsection.3.8.7} \contentsline {chapter}{\numberline {4}SPar-K}{59}{chapter.4} \contentsline {section}{\numberline {4.1}Algorithm}{59}{section.4.1} \contentsline {section}{\numberline {4.2}Implementation}{60}{section.4.2} -\contentsline {section}{\numberline {4.3}Benchmarking}{61}{section.4.3} -\contentsline {subsection}{\numberline {4.3.1}K-means}{61}{subsection.4.3.1} +\contentsline {section}{\numberline {4.3}Benchmarking}{64}{section.4.3} +\contentsline {subsection}{\numberline {4.3.1}K-means}{64}{subsection.4.3.1} \contentsline {subsection}{\numberline {4.3.2}ChIPPartitioning}{64}{subsection.4.3.2} \contentsline {subsection}{\numberline {4.3.3}Data}{64}{subsection.4.3.3} \contentsline {subsection}{\numberline {4.3.4}Performances}{65}{subsection.4.3.4} \contentsline {section}{\numberline {4.4}Partition of DNase and MNase data}{65}{section.4.4} -\contentsline {section}{\numberline {4.5}Conclusions}{65}{section.4.5} +\contentsline {section}{\numberline {4.5}Conclusions}{68}{section.4.5} \contentsline {chapter}{\numberline {5}SMiLE-seq data analysis}{69}{chapter.5} \contentsline {chapter}{SMiLE-seq data analysis}{69}{chapter.5} \contentsline {section}{\numberline {5.1}Introduction}{69}{section.5.1} \contentsline {section}{\numberline {5.2}Hidden Markov Model Motif discovery}{71}{section.5.2} \contentsline {section}{\numberline {5.3}Binding motif evaluation}{72}{section.5.3} \contentsline {section}{\numberline {5.4}Results}{73}{section.5.4} \contentsline {section}{\numberline {5.5}Conclusions}{75}{section.5.5} \contentsline {chapter}{\numberline {6}PWMScan}{77}{chapter.6} \contentsline {section}{\numberline {6.1}Algorithms}{77}{section.6.1} \contentsline {subsection}{\numberline {6.1.1}Scanner algorithm}{78}{subsection.6.1.1} \contentsline {subsection}{\numberline {6.1.2}Matches enumeration and mapping}{78}{subsection.6.1.2} \contentsline {section}{\numberline {6.2}PMWScan architecture}{79}{section.6.2} \contentsline {section}{\numberline {6.3}Benchmark}{81}{section.6.3} \contentsline {section}{\numberline {6.4}Conclusions}{83}{section.6.4} \contentsline {chapter}{\numberline {7}Chromatin accessibility of monocytes}{85}{chapter.7} \contentsline {section}{\numberline {7.1}Monitoring TF binding}{85}{section.7.1} \contentsline {section}{\numberline {7.2}The advent of single cell DGF}{86}{section.7.2} \contentsline {section}{\numberline {7.3}Open issues}{86}{section.7.3} \contentsline {section}{\numberline {7.4}Data}{86}{section.7.4} \contentsline {section}{\numberline {7.5}Identifying over-represented signals}{87}{section.7.5} \contentsline {subsection}{\numberline {7.5.1}ChIPPartitioning algorithm}{87}{subsection.7.5.1} \contentsline {subsection}{\numberline {7.5.2}EMSequence algorithm}{87}{subsection.7.5.2} \contentsline {subsubsection}{without shift and flip}{89}{figure.caption.35} \contentsline {subsubsection}{with shift and flip}{89}{equation.7.5.2} \contentsline {subsection}{\numberline {7.5.3}EMJoint algorithm}{91}{subsection.7.5.3} \contentsline {subsection}{\numberline {7.5.4}Data realignment}{92}{subsection.7.5.4} \contentsline {subsection}{\numberline {7.5.5}Soft aggregation plots}{92}{subsection.7.5.5} \contentsline {section}{\numberline {7.6}Data processing}{93}{section.7.6} \contentsline {section}{\numberline {7.7}Results}{93}{section.7.7} \contentsline {subsection}{\numberline {7.7.1}Aligning the binding sites}{93}{subsection.7.7.1} \contentsline {subsection}{\numberline {7.7.2}Exploring individual TF classes}{95}{subsection.7.7.2} \contentsline {section}{\numberline {7.8}Discussions}{97}{section.7.8} \contentsline {section}{\numberline {7.9}Perspectives}{97}{section.7.9} \contentsline {section}{\numberline {7.10}Methods}{98}{section.7.10} \contentsline {subsection}{\numberline {7.10.1}Code availability}{98}{subsection.7.10.1} \contentsline {subsection}{\numberline {7.10.2}Data sources}{99}{subsection.7.10.2} \contentsline {subsection}{\numberline {7.10.3}Data post-processing}{99}{subsection.7.10.3} \contentsline {subsection}{\numberline {7.10.4}Model extension}{100}{subsection.7.10.4} \contentsline {subsection}{\numberline {7.10.5}Extracting data assigned to a class}{100}{subsection.7.10.5} \contentsline {subsection}{\numberline {7.10.6}Programs}{103}{subsection.7.10.6} \contentsline {subsection}{\numberline {7.10.7}Fragment classes}{104}{subsection.7.10.7} \contentsline {subsection}{\numberline {7.10.8}Simulated sequences}{105}{subsection.7.10.8} \contentsline {subsection}{\numberline {7.10.9}Binding site prediction}{105}{subsection.7.10.9} \contentsline {subsection}{\numberline {7.10.10}Realignment using JASPAR motifs}{106}{subsection.7.10.10} \contentsline {subsection}{\numberline {7.10.11}Per TF sub-classes}{108}{subsection.7.10.11} \contentsline {chapter}{\numberline {8}Discussion}{111}{chapter.8} \contentsline {chapter}{Discussions}{111}{chapter.8} \vspace {\normalbaselineskip } \contentsline {chapter}{\numberline {A}Supplementary material}{115}{appendix.A} \contentsline {section}{\numberline {A.1}ENCODE peaks analysis supplementary material}{116}{section.A.1} \contentsline {section}{\numberline {A.2}SPar-K supplementary material}{126}{section.A.2} \contentsline {section}{\numberline {A.3}SMiLE-seq supplementary material}{139}{section.A.3} \contentsline {section}{\numberline {A.4}Chromatin accessibility of monocytes supplementary material}{139}{section.A.4} \contentsline {subsection}{\numberline {A.4.1}Fragment size analysis}{139}{subsection.A.4.1} \contentsline {subsection}{\numberline {A.4.2}Measuring open chromatin and nucleosome occupancy}{140}{subsection.A.4.2} \contentsline {subsection}{\numberline {A.4.3}Evaluation of EMSequence and ChIPPartitioning}{143}{subsection.A.4.3} \contentsline {subsubsection}{EMSequence}{143}{subsection.A.4.3} \contentsline {subsubsection}{ChIPPartitioning}{146}{figure.caption.56} \contentsline {subsection}{\numberline {A.4.4}Other supplementary figures}{149}{subsection.A.4.4} \contentsline {chapter}{Bibliography}{153}{section*.64} \contentsline {chapter}{Bibliography}{165}{appendix*.65} \contentsline {chapter}{Curriculum Vitae}{167}{section*.66} diff --git a/scripts/ch_atac-seq/figure_ctcf_6classes.R b/scripts/ch_atac-seq/figure_ctcf_6classes.R index ce64495..50025c6 100644 --- a/scripts/ch_atac-seq/figure_ctcf_6classes.R +++ b/scripts/ch_atac-seq/figure_ctcf_6classes.R @@ -1,172 +1,172 @@ setwd(file.path("/", "local", "groux", "scATAC-seq")) # libraries library(RColorBrewer) # functions source(file.path("scripts", "functions.R")) # path to the images for the logo path.a = file.path("res/A.png") path.c = file.path("res/C.png") path.g = file.path("res/G.png") path.t = file.path("res/T.png") # paths data.dir = file.path("/", "local", "groux", "scATAC-seq", "results") dest.dir = file.path("/", "local", "groux", "phd_thesis", "scATAC-seq", "figures") # colors col = brewer.pal(3, "Set1") ##################################### ctcf with flip only ##################################### # open chromatin data = read.read.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_0", "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_model.mat")) model.open = data$models model.prob = data$prob data = NULL # nucleosomes model.nucl = read.read.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_0", "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_nucleosomes_fragment_center_model.mat"))$models # sequence model.seq = read.sequence.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_0", "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_sequences_model.mat"))$models png(filename=file.path(dest.dir, "ctcf_motifs_6class_noshift_flip.png"), - units="in", res=720, width=18, height=8) + units="in", res=480, width=18, height=8) m = matrix(1:6, nrow=3, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(model.prob, decreasing=T) ref.open = model.open[ord,, drop=F] ref.nucl = model.nucl[ord,, drop=F] ref.seq = model.seq[,,ord, drop=F] prob = model.prob[ord] class = c(1:nrow(ref.open))[ord] for(i in 1:nrow(ref.open)) { # plot logo plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t, main=sprintf("class %d (p=%.2f)", class[i], prob[i])) # x-axis x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3) x.at = (x.lab + ncol(ref.open)) / 2 axis(1, at=x.at, labels=x.lab) # y-axis is [0,2] because these are bits but # label it [0,1] for min/max signal x.at = seq(0, 2, 1) axis(2, at=x.at, labels=0.5*x.at) # plot signal (multiplies by 2 because the y-axis goes to 2 bits) lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=2, col=col[1]) lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=2, col=col[2]) } row_n = 1 # row counter col_n = 1 # column counter for(i in 1:nrow(ref.open)) { # plot logo center right = 0.5*col_n - 0.01 left = right - 0.2 bottom = 1-(row_n*(1/3))+0.1 top = bottom + 0.2 par(fig=c(left, right, bottom, top), new=T) idx = 380:420 plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t) # plot signal (multiplies by 2 because the y-axis goes to 2 bits) lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=2, col=col[1]) lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=2, col=col[2]) # xaxis x.at = 1:length(idx) axis(1, at=x.at, labels=x.at) # yaxis x.at = seq(0, 2, by=1) axis(2, at=x.at, labels=x.at) row_n = row_n + 1 if(i %% 3 == 0) { col_n = col_n + 1 row_n = 1 } } dev.off() ##################################### ctcf with flip and shift ##################################### # open chromatin data = read.read.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_1", "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_model.mat")) model.open = data$models model.prob = data$prob data = NULL # nucleosomes model.nucl = read.read.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_1", "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_1nucl_fragment_center_model.mat"))$models # sequence model.seq = read.sequence.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_1", "ctcf_motifs_10e-6_open_bin1bp_read_atac_6class_sequences_model.mat"))$models png(filename=file.path(dest.dir, "ctcf_motifs_6class_shift_flip.png"), - units="in", res=720, width=18, height=8) + units="in", res=480, width=18, height=8) m = matrix(1:6, nrow=3, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(model.prob, decreasing=T) ref.open = model.open[ord,, drop=F] ref.nucl = model.nucl[ord,, drop=F] ref.seq = model.seq[,,ord, drop=F] prob = model.prob[ord] class = c(1:nrow(ref.open))[ord] for(i in 1:nrow(ref.open)) { # plot logo plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t, main=sprintf("class %d (p=%.2f)", class[i], prob[i])) # x-axis x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3) x.at = (x.lab + ncol(ref.open)) / 2 axis(1, at=x.at, labels=x.lab) # y-axis is [0,2] because these are bits but # label it [0,1] for min/max signal x.at = seq(0, 2, 1) axis(2, at=x.at, labels=0.5*x.at) # plot signal (multiplies by 2 because the y-axis goes to 2 bits) lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=2, col=col[1]) lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=2, col=col[2]) } row_n = 1 # row counter col_n = 1 # column counter for(i in 1:nrow(ref.open)) { # plot logo center right = 0.5*col_n - 0.01 left = right - 0.2 bottom = 1-(row_n*(1/3))+0.1 top = bottom + 0.2 par(fig=c(left, right, bottom, top), new=T) idx = 380:420 plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t) # plot signal (multiplies by 2 because the y-axis goes to 2 bits) lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=2, col=col[1]) lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=2, col=col[2]) # xaxis x.at = 1:length(idx) axis(1, at=x.at, labels=x.at) # yaxis x.at = seq(0, 2, by=1) axis(2, at=x.at, labels=x.at) row_n = row_n + 1 if(i %% 3 == 0) { col_n = col_n + 1 row_n = 1 } } dev.off() diff --git a/scripts/ch_atac-seq/figure_ctcf_sp1_myc_ebf1_footprint.R b/scripts/ch_atac-seq/figure_ctcf_sp1_myc_ebf1_footprint.R index a3ad4dd..6d68585 100644 --- a/scripts/ch_atac-seq/figure_ctcf_sp1_myc_ebf1_footprint.R +++ b/scripts/ch_atac-seq/figure_ctcf_sp1_myc_ebf1_footprint.R @@ -1,109 +1,109 @@ setwd(file.path("/", "local", "groux", "scATAC-seq")) # libraries library(RColorBrewer) # functions source(file.path("scripts", "functions.R")) data.dir = file.path("/", "local", "groux", "scATAC-seq", "data", "10xgenomics_PBMC_5k_motifs") dest.dir = file.path("/", "local", "groux", "phd_thesis", "scATAC-seq", "figures") # CTCF data ## open chromatin ctcf.open.1.atac = as.matrix(read.table(file.path(data.dir, "ctcf_motifs_10e-6_open_bin1bp_read_atac.mat"))) ## nucleosomes ctcf.nucl.1.cent = as.matrix(read.table(file.path(data.dir, "ctcf_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"))) ctcf.open.1.atac = colMeans(ctcf.open.1.atac) / max(colMeans(ctcf.open.1.atac)) ctcf.nucl.1.cent = colMeans(ctcf.nucl.1.cent) / max(colMeans(ctcf.nucl.1.cent)) # SP1 data ## open chromatin sp1.open.1.atac = as.matrix(read.table(file.path(data.dir, "sp1_motifs_10e-7_open_bin1bp_read_atac.mat"))) ## nucleosomes sp1.nucl.1.cent = as.matrix(read.table(file.path(data.dir, "sp1_motifs_10e-7_nucleosomes_bin1bp_fragment_center.mat"))) sp1.open.1.atac = colMeans(sp1.open.1.atac) / max(colMeans(sp1.open.1.atac)) sp1.nucl.1.cent = colMeans(sp1.nucl.1.cent) / max(colMeans(sp1.nucl.1.cent)) # myc data ## open chromatin myc.open.1.atac = as.matrix(read.table(file.path(data.dir, "myc_motifs_10e-6_open_bin1bp_read_atac.mat"))) ## nucleosomes myc.nucl.1.cent = as.matrix(read.table(file.path(data.dir, "myc_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"))) myc.open.1.atac = colMeans(myc.open.1.atac) / max(colMeans(myc.open.1.atac)) myc.nucl.1.cent = colMeans(myc.nucl.1.cent) / max(colMeans(myc.nucl.1.cent)) # EBF1 data ## open chromatin ebf1.open.1.atac = as.matrix(read.table(file.path(data.dir, "ebf1_motifs_10e-6_open_bin1bp_read_atac.mat"))) ## nucleosomes ebf1.nucl.1.cent = as.matrix(read.table(file.path(data.dir, "ebf1_motifs_10e-6_nucleosomes_bin1bp_fragment_center.mat"))) ebf1.open.1.atac = colMeans(ebf1.open.1.atac) / max(colMeans(ebf1.open.1.atac)) ebf1.nucl.1.cent = colMeans(ebf1.nucl.1.cent) / max(colMeans(ebf1.nucl.1.cent)) # colors col = brewer.pal(4, "Set1") # display center only idx = 200:600 # x-axis axis.at = seq(-200, 200, length.out=3) + 200 axis.lab = seq(-200, 200, by=200) y.lim = c(0,1) # X11(width=18, height=9) png(filename=file.path(dest.dir, "ctcf_sp1_myc_ebf1_footprint.png"), - units="in", res=720, width=18, height=9) + units="in", res=480, width=18, height=9) m = matrix(nrow=2, ncol=2, data=c(1,3, 2,4), byrow=T) l = layout(mat=m, widths=c(1,1), heights=c(1,1)) p = par(mar=c(5.1, 5.1, 4.1, 2.1)) # CTCF plot(ctcf.open.1.atac[idx], col=col[1], lwd=3, type='l', main="CTCF motif", xlab="pos[bp]", ylab="Prop max. signal", xaxt='n', ylim=y.lim, cex.axis=2, cex.lab=2, cex.main=1.8) lines(ctcf.nucl.1.cent[idx], col=col[2], lwd=3) axis(side=1, at=axis.at, labels=axis.lab, cex.axis=1.8) abline(v=191, lty=2, lwd=3) abline(v=210, lty=2, lwd=3) # SP1 plot(sp1.open.1.atac[idx], col=col[1], lwd=3, type='l', main="SP1 motif", xlab="pos[bp]", ylab="Prop max. signal", xaxt='n', ylim=y.lim, cex.axis=2, cex.lab=2, cex.main=1.8) lines(sp1.nucl.1.cent[idx], col=col[2], lwd=3) axis(side=1, at=axis.at, labels=axis.lab, cex.axis=1.8) abline(v=194, lty=2, lwd=3) abline(v=210, lty=2, lwd=3) # myc plot(myc.open.1.atac[idx], col=col[1], lwd=3, type='l', main="myc motif", xlab="pos[bp]", ylab="Prop max. signal", xaxt='n', ylim=y.lim, cex.axis=2, cex.lab=2, cex.main=1.8) lines(myc.nucl.1.cent[idx], col=col[2], lwd=3) axis(side=1, at=axis.at, labels=axis.lab, cex.axis=1.8) abline(v=195, lty=2, lwd=3) abline(v=209, lty=2, lwd=3) # EBF1 plot(ebf1.open.1.atac[idx], col=col[1], lwd=3, type='l', main="EBF1 motif", xlab="pos[bp]", ylab="Prop max. signal", xaxt='n', ylim=y.lim, cex.axis=2, cex.lab=2, cex.main=1.8) lines(ebf1.nucl.1.cent[idx], col=col[2], lwd=3) axis(side=1, at=axis.at, labels=axis.lab, cex.axis=1.8) abline(v=197, lty=2, lwd=3) abline(v=206, lty=2, lwd=3) dev.off() - \ No newline at end of file + diff --git a/scripts/ch_atac-seq/figure_sp1_6classes.R b/scripts/ch_atac-seq/figure_sp1_6classes.R index 59e1a7e..32f2a22 100644 --- a/scripts/ch_atac-seq/figure_sp1_6classes.R +++ b/scripts/ch_atac-seq/figure_sp1_6classes.R @@ -1,172 +1,172 @@ setwd(file.path("/", "local", "groux", "scATAC-seq")) # libraries library(RColorBrewer) # functions source(file.path("scripts", "functions.R")) # path to the images for the logo path.a = file.path("res/A.png") path.c = file.path("res/C.png") path.g = file.path("res/G.png") path.t = file.path("res/T.png") # paths data.dir = file.path("/", "local", "groux", "scATAC-seq", "results") -dest.dir = file.path("/", "local", "groux", "phd_thesis", "scATAC-seq", "figures") +dest.dir = file.path("/", "local", "groux", "phd_thesis", "images", "ch_atac-seq") # colors col = brewer.pal(3, "Set1") ##################################### sp1 with flip only ##################################### # open chromatin data = read.read.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_0", "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_model.mat")) model.open = data$models model.prob = data$prob data = NULL # nucleosomes model.nucl = read.read.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_0", "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_nucleosomes_fragment_center_model.mat"))$models # sequence model.seq = read.sequence.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_0", "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_sequences_model.mat"))$models png(filename=file.path(dest.dir, "sp1_motifs_6class_noshift_flip.png"), - units="in", res=720, width=18, height=8) + units="in", res=480, width=18, height=8) m = matrix(1:6, nrow=3, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(model.prob, decreasing=T) ref.open = model.open[ord,, drop=F] ref.nucl = model.nucl[ord,, drop=F] ref.seq = model.seq[,,ord, drop=F] prob = model.prob[ord] class = c(1:nrow(ref.open))[ord] for(i in 1:nrow(ref.open)) { # plot logo plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t, main=sprintf("class %d (p=%.2f)", class[i], prob[i])) # x-axis x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3) x.at = (x.lab + ncol(ref.open)) / 2 axis(1, at=x.at, labels=x.lab) # y-axis is [0,2] because these are bits but # label it [0,1] for min/max signal x.at = seq(0, 2, 1) axis(2, at=x.at, labels=0.5*x.at) # plot signal (multiplies by 2 because the y-axis goes to 2 bits) lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=2, col=col[1]) lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=2, col=col[2]) } row_n = 1 # row counter col_n = 1 # column counter for(i in 1:nrow(ref.open)) { # plot logo center right = 0.5*col_n - 0.01 left = right - 0.2 bottom = 1-(row_n*(1/3))+0.1 top = bottom + 0.2 par(fig=c(left, right, bottom, top), new=T) idx = 380:420 plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t) # plot signal (multiplies by 2 because the y-axis goes to 2 bits) lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=2, col=col[1]) lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=2, col=col[2]) # xaxis x.at = 1:length(idx) axis(1, at=x.at, labels=x.at) # yaxis x.at = seq(0, 2, by=1) axis(2, at=x.at, labels=x.at) row_n = row_n + 1 if(i %% 3 == 0) { col_n = col_n + 1 row_n = 1 } } dev.off() ##################################### sp1 with flip and shift ##################################### # open chromatin data = read.read.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_model.mat")) model.open = data$models model.prob = data$prob data = NULL # nucleosomes model.nucl = read.read.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_1nucl_fragment_center_model.mat"))$models # sequence model.seq = read.sequence.models(file.path(data.dir, "10xgenomics_PBMC_5k_motifs_classification_1", "sp1_motifs_10e-7_open_bin1bp_read_atac_6class_sequences_model.mat"))$models png(filename=file.path(dest.dir, "sp1_motifs_6class_shift_flip.png"), - units="in", res=720, width=18, height=8) + units="in", res=480, width=18, height=8) m = matrix(1:6, nrow=3, ncol=2, byrow=F) layout(m) # order from most to least probable class ord = order(model.prob, decreasing=T) ref.open = model.open[ord,, drop=F] ref.nucl = model.nucl[ord,, drop=F] ref.seq = model.seq[,,ord, drop=F] prob = model.prob[ord] class = c(1:nrow(ref.open))[ord] for(i in 1:nrow(ref.open)) { # plot logo plot.logo(ref.seq[,,i], path.a, path.c, path.g, path.t, main=sprintf("class %d (p=%.2f)", class[i], prob[i])) # x-axis x.lab = seq(-ncol(ref.open), ncol(ref.open), length.out=3) x.at = (x.lab + ncol(ref.open)) / 2 axis(1, at=x.at, labels=x.lab) # y-axis is [0,2] because these are bits but # label it [0,1] for min/max signal x.at = seq(0, 2, 1) axis(2, at=x.at, labels=0.5*x.at) # plot signal (multiplies by 2 because the y-axis goes to 2 bits) lines(2*(ref.open[i,] / max(ref.open[i,])), lwd=2, col=col[1]) lines(2*(ref.nucl[i,] / max(ref.nucl[i,])), lwd=2, col=col[2]) } row_n = 1 # row counter col_n = 1 # column counter for(i in 1:nrow(ref.open)) { # plot logo center right = 0.5*col_n - 0.01 left = right - 0.2 bottom = 1-(row_n*(1/3))+0.1 top = bottom + 0.2 par(fig=c(left, right, bottom, top), new=T) idx = 380:420 plot.logo(ref.seq[,idx,i], path.a, path.c, path.g, path.t) # plot signal (multiplies by 2 because the y-axis goes to 2 bits) lines(2*(ref.open[i,idx] / max(ref.open[i,])), lwd=2, col=col[1]) lines(2*(ref.nucl[i,idx] / max(ref.nucl[i,])), lwd=2, col=col[2]) # xaxis x.at = 1:length(idx) axis(1, at=x.at, labels=x.at) # yaxis x.at = seq(0, 2, by=1) axis(2, at=x.at, labels=x.at) row_n = row_n + 1 if(i %% 3 == 0) { col_n = col_n + 1 row_n = 1 } } dev.off() diff --git a/scripts/ch_spark/figures.R b/scripts/ch_spark/figures.R new file mode 100644 index 0000000..ba789f4 --- /dev/null +++ b/scripts/ch_spark/figures.R @@ -0,0 +1,1073 @@ +# REDO THE FIGURES FROM THE ARTICLE BUT IN PNG FORMAT INSTEAD OF PDF, WITH LOWER RESOLUTION +# IT IS AN ADAPTED COPY/PASTE FROM /local/groux/Kmeans_chipseq/bin/article/figures.R + +setwd(file.path("", "local", "groux", "Kmeans_chipseq")) + +library(RColorBrewer) +library(plotrix) + + + + +# ===================================================== Supplemental Figure 1 ===================================================== +# plot the class profiles of the simulated classes and one example of a dataset with low noise, one example with high noise +# and their two best partitions + +dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark") + +source(file.path("res", "functions_utility.R")) +source(file.path("res", "functions_plot.R")) + +# class densities +# general parameter +n_samples = 1000 +n_col = 2001 # the length of a signal vector +shift_max = 100 # the maximum possible shift +p_flip = 0.3 # the prob of having a flipped signal +# class 1 : a simple gaussian +class1_n = 600 +class1_m = ceiling(n_col/2) - ceiling(shift_max/2) # class 1 mean, mean will be in average in the middle of the data vector +class1_s = 40 # class 1 sd +# the signal shape +shape1 = dnorm(1:(n_col-shift_max+1), class1_m, class1_s) +# class 2 : half a gaussian +class2_n = n_samples - class1_n +class2_m = floor(n_col/2) - floor(shift_max/2) # class 2 mean, mean will be in average in the middle of the data vector +class2_s = 40 # class 2 sd +# the signal shape +shape2 = dnorm(1:(n_col-shift_max+1), class2_m, class2_s) +shape2[class2_m:length(shape2)] = min(shape2) +# class 3 : a uniform +class3_n = 333 +class3_from = floor(n_col/2) - floor(shift_max/2) -120 # class 3 from, mean will be in average in the middle of the data vector +class3_to = floor(n_col/2) - floor(shift_max/2) +120 # class 3 to, mean will be in average in the middle of the data vector +# the signal shape +shape3 = dunif(1:(n_col-shift_max+1), class3_from, class3_to) +# normalize +shape1 = shape1 / sum(shape1) +shape2 = shape2 / sum(shape2) +shape2 = shape2 / sum(shape2) + +# two datasets and an example of partitioning using SPar-K +labels = as.matrix(read.table("data/simulated_data_chipseq/simulated_data_3_class_asym_classes_cov100_noise0.0.txt"))[,1] +# coverage 100, noise 0 +data.100.0 = as.matrix(read.table(file.path("data", "simulated_data_chipseq", "simulated_data_3_class_asym_cov100_noise0.0.txt"))) +ari = read.RDS(file.path("results", "simulated_data_chipseq", "app", "simulated_data_chipseq_3_class_asym_ari_newkmean.RDS")) +best = which.max(ari$`kmean++`$nooutlier$`cov 100`$`noise 0.0`$`3 cluster`) +data.100.0.part = read.table(file.path("results", + "simulated_data_chipseq", + "app", + "seeding_kmean++", + sprintf("simulated_data_3_class_asym_cov100_noise0.0_3cluster_flip_normcorr_%d.txt", best)), + header=T) +data.100.0.part = realign.data(data.100.0, data.100.0.part$shift_ref, data.100.0.part$shift_dat, data.100.0.part$flip, 71) + +# coverage 100, noise 90 +data.100.9 = as.matrix(read.table(file.path("data", "simulated_data_chipseq", "simulated_data_3_class_asym_cov100_noise0.9.txt"))) +best = which.max(ari$`kmean++`$nooutlier$`cov 100`$`noise 0.9`$`3 cluster`) +data.100.9.part = read.table(file.path("results", + "simulated_data_chipseq", + "app", + "seeding_kmean++", + sprintf("simulated_data_3_class_asym_cov100_noise0.9_3cluster_flip_normcorr_%d.txt", best)), + header=T) +data.100.9.part = realign.data(data.100.9, data.100.9.part$shift_ref, data.100.9.part$shift_dat, data.100.9.part$flip, 71) + + +col = brewer.pal(3, "Set1") +col.heat = colorRampPalette(c("white", "red"), space = "rgb")(100) +col.lab = c(rep(col[1], table(labels)[1]), + rep(col[2], table(labels)[2]), + rep(col[3], table(labels)[3])) + +x.lab = seq(-1000, 1000, length.out=5) +x.at = seq(0, 1, length.out=length(x.lab)) + +# pdf(file=file.path("results", "article", "supplemental_figure1.pdf"), width=14, height=7) +png(filename=file.path(dest.dir, "supplemental_figure1.png"), + width=14, height=7, units="in", res=300) + par(mar=c(5.1, 6.1, 4.1, 2.1)) + + lay = layout(mat=matrix(c(1,4,5, 8, 9, + 1,4,5, 8, 9, + 2,4,5, 8, 9, + 2,6,7,10,11, + 3,6,7,10,11, + 3,6,7,10,11), nrow=6, ncol=5, byrow=T),widths=c(5,0.5,5,0.5,5)) + # layout.show(lay) + + # class 1 density + x = 1:length(shape1) + plot(x, shape1, lwd=3, type='l', col=col[1], main="Class 1 density", xlab="position [bp]", ylab="density", + cex.main=2, cex.axis=2, cex.lab=2) + text(x=-200, y=0.0125, labels='A', cex=4.5, xpd=NA, font=2) + # class 2 density + plot(x, shape2, lwd=3, type='l', col=col[2], main="Class 2 density", xlab="position [bp]", ylab="density", + cex.main=2, cex.axis=2, cex.lab=2) + # class 3 density + plot(x, shape3, lwd=3, type='l', col=col[3], main="Class 3 density", xlab="position [bp]", ylab="density", + cex.main=2, cex.axis=2, cex.lab=2) + + # dataset coverage 100 noise 0 + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(labels, lwd=2, colors=col.lab) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(data.100.0)), col=col.heat, xaxt='n', yaxt='n', main="Coverage 100, noise 0%", ylab="", xlab="position (bp)", + cex.main=2.0, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.07, y=1.1, labels='B', cex=4.5, xpd=NA, font=2) + + # dataset coverage 100 noise 0.9 + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(labels, lwd=2, colors=col.lab) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(data.100.9)), col=col.heat, xaxt='n', yaxt='n', main="Coverage 100, noise 90%", ylab="", xlab="position (bp)", + cex.main=2.0, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.07, y=1.1, labels='D', cex=4.5, xpd=NA, font=2) + + # partition of dataset coverage 100 noise 0 + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(labels, lwd=2, colors=col.lab) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(data.100.0.part)), col=col.heat, xaxt='n', yaxt='n', main="Coverage 100, noise 0%", ylab="", xlab="Approximated pos. (bp)", + cex.main=2.0, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.07, y=1.1, labels='C', cex=4.5, xpd=NA, font=2) + + # partition dataset coverage 100 noise 0.9 + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(labels, lwd=2, colors=col.lab) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(data.100.9.part)), col=col.heat, xaxt='n', yaxt='n', main="Coverage 100, noise 90%", ylab="", xlab="Approximated pos. (bp)", + cex.main=2.0, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.07, y=1.1, labels='E', cex=4.5, xpd=NA, font=2) +dev.off() + +rm(list=ls()) + + + + +# ===================================================== Supplemental Figure 2 ===================================================== +# plot the Adjuted Rand Index values for all programs, measured on the simulated data with different coverages, background to +# noise ratios and containing 3 classes. +# supplemantal figure 1 : results when clustering the data with random seeding +# supplemantal figure 2 : results when clustering the data with kmean++ seeding + +dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark") +source(file.path("res", "functions_utility.R")) + + + +# some colors +colors = brewer.pal(9, "Set1") + +# coverages and signal/noise ratiios used to simulate the data +coverages = c(10, 50, 100) +noises = c(0.0, 0.1, 0.5, 0.9) + +# load Adjusted Rand Index measured +ari.kmean.new = read.RDS(file.path("results", + "simulated_data_chipseq", + "app", + "simulated_data_chipseq_3_class_asym_ari_newkmean.RDS")) +ari.kmean.reg = read.RDS(file.path("results", "simulated_data_chipseq", + "kmean", + "simulated_data_chipseq_3_class_asym_ari_kmean.RDS")) +ari.chippart = read.RDS(file.path("results", + "simulated_data_chipseq", + "chippartitioning", + "simulated_data_chipseq_3_class_asym_ari_chippartitioning.RDS")) +ari.shuf = read.RDS(file.path("results", + "simulated_data_chipseq", + "simulated_data_chipseq_3_class_asym_gamma_shuffled.RDS")) + +# pdf(file=file.path("results", "article", "supplemental_figure2.pdf"), width=16, height=7) +png(filename=file.path(dest.dir, "supplemental_figure2.png"), + width=16, height=7, units="in", res=300) + par(mar=c(5.1, 6.1, 4.1, 2.1)) + + colors.boxplot = c(rep(c(colors[1], + colors[5], + colors[2:4]), + each=12), + colors[7]) + boxplot( + # new K-means + # random seeding + # normal + # coverage 10 + ari.kmean.new[["random"]][["normal"]][["cov 10"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.new[["random"]][["normal"]][["cov 10"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.new[["random"]][["normal"]][["cov 10"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.new[["random"]][["normal"]][["cov 10"]][["noise 0.9"]][["3 cluster"]], + # coverage 50 + ari.kmean.new[["random"]][["normal"]][["cov 50"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.new[["random"]][["normal"]][["cov 50"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.new[["random"]][["normal"]][["cov 50"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.new[["random"]][["normal"]][["cov 50"]][["noise 0.9"]][["3 cluster"]], + # coverage 100 + ari.kmean.new[["random"]][["normal"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.new[["random"]][["normal"]][["cov 100"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.new[["random"]][["normal"]][["cov 100"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.new[["random"]][["normal"]][["cov 100"]][["noise 0.9"]][["3 cluster"]], + + # new K-means + # random seeding + # normal + # coverage 10 + ari.kmean.new[["random"]][["nooutlier"]][["cov 10"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.new[["random"]][["nooutlier"]][["cov 10"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.new[["random"]][["nooutlier"]][["cov 10"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.new[["random"]][["nooutlier"]][["cov 10"]][["noise 0.9"]][["3 cluster"]], + # coverage 50 + ari.kmean.new[["random"]][["nooutlier"]][["cov 50"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.new[["random"]][["nooutlier"]][["cov 50"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.new[["random"]][["nooutlier"]][["cov 50"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.new[["random"]][["nooutlier"]][["cov 50"]][["noise 0.9"]][["3 cluster"]], + # coverage 100 + ari.kmean.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.9"]][["3 cluster"]], + + # regular K-means + # random seeding + # euclidean distance + # kmean++ seeding + # coverage 10 + ari.kmean.reg[["random"]][["eucl"]][["cov 10"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.reg[["random"]][["eucl"]][["cov 10"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.reg[["random"]][["eucl"]][["cov 10"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.reg[["random"]][["eucl"]][["cov 10"]][["noise 0.9"]][["3 cluster"]], + # coverage 50 + ari.kmean.reg[["random"]][["eucl"]][["cov 50"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.reg[["random"]][["eucl"]][["cov 50"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.reg[["random"]][["eucl"]][["cov 50"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.reg[["random"]][["eucl"]][["cov 50"]][["noise 0.9"]][["3 cluster"]], + # coverage 100 + ari.kmean.reg[["random"]][["eucl"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.reg[["random"]][["eucl"]][["cov 100"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.reg[["random"]][["eucl"]][["cov 100"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.reg[["random"]][["eucl"]][["cov 100"]][["noise 0.9"]][["3 cluster"]], + + # regular K-means + # random seeding + # euclidean distance + # kmean++ seeding + # coverage 10 + ari.kmean.reg[["random"]][["corr"]][["cov 10"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.reg[["random"]][["corr"]][["cov 10"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.reg[["random"]][["corr"]][["cov 10"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.reg[["random"]][["corr"]][["cov 10"]][["noise 0.9"]][["3 cluster"]], + # coverage 50 + ari.kmean.reg[["random"]][["corr"]][["cov 50"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.reg[["random"]][["corr"]][["cov 50"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.reg[["random"]][["corr"]][["cov 50"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.reg[["random"]][["corr"]][["cov 50"]][["noise 0.9"]][["3 cluster"]], + # coverage 100 + ari.kmean.reg[["random"]][["corr"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + ari.kmean.reg[["random"]][["corr"]][["cov 100"]][["noise 0.1"]][["3 cluster"]], + ari.kmean.reg[["random"]][["corr"]][["cov 100"]][["noise 0.5"]][["3 cluster"]], + ari.kmean.reg[["random"]][["corr"]][["cov 100"]][["noise 0.9"]][["3 cluster"]], + + # ChIPparitioning + # random seeding + # coverage 10 + ari.chippart[["random"]][["cov 10"]][["noise 0.0"]][["3 cluster"]], + ari.chippart[["random"]][["cov 10"]][["noise 0.1"]][["3 cluster"]], + ari.chippart[["random"]][["cov 10"]][["noise 0.5"]][["3 cluster"]], + ari.chippart[["random"]][["cov 10"]][["noise 0.9"]][["3 cluster"]], + # coverage 50 + ari.chippart[["random"]][["cov 50"]][["noise 0.0"]][["3 cluster"]], + ari.chippart[["random"]][["cov 50"]][["noise 0.1"]][["3 cluster"]], + ari.chippart[["random"]][["cov 50"]][["noise 0.5"]][["3 cluster"]], + ari.chippart[["random"]][["cov 50"]][["noise 0.9"]][["3 cluster"]], + # coverage 100 + ari.chippart[["random"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + ari.chippart[["random"]][["cov 100"]][["noise 0.1"]][["3 cluster"]], + ari.chippart[["random"]][["cov 100"]][["noise 0.5"]][["3 cluster"]], + ari.chippart[["random"]][["cov 100"]][["noise 0.9"]][["3 cluster"]], + + # Random expectations + ari.shuf, + + main="Adjusted Rand Index", xlab="", ylab="ARI", + xaxt='n', yaxt='n', cex.main=3, cex.axis=2, cex.lab=2, ylim=c(-0.2, 1.4), + col=colors.boxplot) + # y axis + axis(side=2, at=seq(0, 1, by=0.2), cex.axis=1.3) + # add horizontal lines + abline(h=1.0, lty=2) + abline(h=0.5, lty=2) + abline(h=0.0, lty=2) + # draw noise values + # parameters to draw triangles + y_from_tri = -0.05 + y_to_tri = y_from_tri + x_from_tri = 0.5 + x_to_tri = 1 + x_by_tri = length(noises) + 1 + h_tri = 0.02 + # parameters to draw noise values + x_noise = 1 + y_noise = y_from_tri - 0.05 + x_by_noise = 1 + for(i in 1:5) + { for(j in 1:length(coverages)) + { x_to_tri = x_from_tri + x_by_tri - 1 + polygon(x=c(x_from_tri, x_to_tri, x_to_tri), y=c(y_from_tri, y_to_tri, y_to_tri+h_tri), col="black") + for(k in 1:length(noises)) + { + text(x=x_noise, y=y_noise, labels=noises[k], cex=0.8) + x_noise = x_noise + x_by_noise + } + abline(v=x_from_tri, lty=2) + x_from_tri = x_to_tri + } + } + abline(v=x_from_tri, lty=2) + # label the random values + text(x=x_noise, + y=y_noise, + labels="R") + # draw coverage values + y_cov = y_from_tri - 0.1 + y_cov_text = y_cov - 0.05 + x_from_cov = 1 + x_to_cov = 1 + x_by_cov = length(noises) + for(i in 1:5) + { for(j in 1:length(coverages)) + { x_to_cov = x_from_cov + x_by_cov - 1 + segments(x0=x_from_cov, x1=x_to_cov, y0=y_cov, y1=y_cov, lwd=3) + text(x=x_from_cov + 0.5*(x_by_cov-1), y=y_cov_text, labels=sprintf("cov %d", coverages[j])) + x_from_cov = x_to_cov + 1 + } + } + # draw legend + legend(x=50, y=1.52, legend=c("SPar-K", + "SPar-K (smooth.)", + "K-means (eucl.)", + "K-means (corr.)", + "ChIPPartitioning", + "Random partition"), + col=unique(colors.boxplot), + cex=1.2, lwd=4, bty='n') +dev.off() + + +rm(list=ls()) + + + +# ===================================================== Supplemental Figure 4 ===================================================== +# plot the SSE for random and Kmeans++ seedings for simulated ChIP-seq data with 3 classes + +dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark") +source(file.path("res", "functions_utility.R")) + +sse = read.RDS(file.path("results", + "simulated_data_chipseq", + "app", + "simulated_data_chipseq_3_class_asym_sse_newkmean.RDS")) +cov = "cov 100" +noise = "noise 0.0" + +# pdf(file=file.path("results", "article", "supplemental_figure4.pdf"), width=10, height=6) +png(filename=file.path(dest.dir, "supplemental_figure4.png"), + width=10, height=6, units="in", res=300) + + par(mar=c(5.1, 6.1, 4.1, 2.1), mfrow=c(2,2)) + # random seeding, normal + option = "normal" + seeding = "random" + x = 2:5 + m = c( + # given seeding + # coverage 100 + median(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]]) + ) + s = c( + # given seeding + # coverage 100 + sd(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]]) + ) + ylim = c(min(m-s), max(m+s)) + # plot medians + plot(x=x, y=m, + main="", xlab="Nb of clusters", ylab="SSE", + cex.main=3, cex.axis=2, cex.lab=2, lwd=3, type='b', ylim=ylim, + xaxt='n') + axis(side=1, at=x, cex.axis=2) + # plot standard deviations + segments(x0=x, x1=x, y0=m-s, y1=m+s, lwd=2) + # plot label + text(x=1.4, y=ylim[1]+ 1.3*diff(ylim), labels="A", cex=3.5, xpd=NA, font=2) + + + # random seeding, nooutlier + option = "nooutlier" + seeding = "random" + x = 2:5 + m = c( + # given seeding + # coverage 100 + median(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]]) + ) + s = c( + # given seeding + # coverage 100 + sd(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]]) + ) + ylim = c(min(m-s), max(m+s)) + # plot medians + plot(x=x, y=m, + main="", xlab="Nb of clusters", ylab="SSE", + cex.main=3, cex.axis=2, cex.lab=2, lwd=3, type='b', ylim=ylim, + xaxt='n') + axis(side=1, at=x, cex.axis=2) + # plot standard deviations + segments(x0=x, x1=x, y0=m-s, y1=m+s, lwd=2) + # plot label + text(x=1.4, y=ylim[1]+ 1.3*diff(ylim), labels="B", cex=3.5, xpd=NA, font=2) + + + # kmean++ seeding, normal + option = "normal" + seeding = "kmean++" + x = 2:5 + m = c( + # given seeding + # coverage 100 + median(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]]) + ) + s = c( + # given seeding + # coverage 100 + sd(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]]) + ) + ylim = c(min(m-s), max(m+s)) + # plot medians + plot(x=x, y=m, + main="", xlab="Nb of clusters", ylab="SSE", + cex.main=3, cex.axis=2, cex.lab=2, lwd=3, type='b', ylim=ylim, + xaxt='n') + axis(side=1, at=x, cex.axis=2) + # plot standard deviations + segments(x0=x, x1=x, y0=m-s, y1=m+s, lwd=2) + # plot label + text(x=1.4, y=ylim[1]+ 1.3*diff(ylim), labels="C", cex=3.5, xpd=NA, font=2) + + + # kmean++ seeding, nooutlier + option = "nooutlier" + seeding = "kmean++" + x = 2:5 + m = c( + # given seeding + # coverage 100 + median(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]), + median(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]]) + ) + s = c( + # given seeding + # coverage 100 + sd(sse[[seeding]][[option]][[cov]][[noise]][["2 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["3 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["4 cluster"]]), + sd(sse[[seeding]][[option]][[cov]][[noise]][["5 cluster"]]) + ) + ylim = c(min(m-s), max(m+s)) + # plot medians + plot(x=x, y=m, + main="", xlab="Nb of clusters", ylab="SSE", + cex.main=3, cex.axis=2, cex.lab=2, lwd=3, type='b', ylim=ylim, + xaxt='n') + axis(side=1, at=x, cex.axis=2) + # plot standard deviations + segments(x0=x, x1=x, y0=m-s, y1=m+s, lwd=2) + # plot label + text(x=1.4, y=ylim[1]+ 1.3*diff(ylim), labels="D", cex=3.5, xpd=NA, font=2) + +dev.off() + + +rm(list=ls()) + + + +# ===================================================== Supplemental Figure 5 ===================================================== +# plot the runtimes for each prorgam when clustering the simulated ChIP-seq data with 3 classes + +dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark") +source(file.path("res", "functions_utility.R")) + +times.new = read.RDS(file.path("results", "runtime", "runtimes_app.RDS")) +times.kmean = read.RDS(file.path("results", "runtime", "runtimes_kmean.RDS")) +times.chipp = read.RDS(file.path("results", "runtime", "runtimes_chippartitioning.RDS")) + +data = list(vec1=times.new[["random"]][["normal"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + vec2=times.new[["kmean++"]][["normal"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + vec3=times.new[["random"]][["nooutlier"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + vec4=times.new[["kmean++"]][["nooutlier"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + vec5=times.kmean[["random"]][["eucl"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + vec6=times.kmean[["random"]][["corr"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + vec7=times.kmean[["kmean++"]][["eucl"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + vec8=times.kmean[["kmean++"]][["corr"]][["cov 100"]][["noise 0.0"]][["3 cluster"]], + vec9=times.chipp[["random"]][["cov 100"]][["noise 0.0"]][["3 cluster"]]) + +# some colors +colors = brewer.pal(9, "Set1") +colors.boxplot = c(rep(colors[1],2), + rep(colors[5],2), + rep(colors[2],2), + rep(colors[3],2), + rep(colors[4],2)) + +# pdf(file=file.path("results", "article", "supplemental_figure5.pdf"), width=10, height=6) +png(filename=file.path(dest.dir, "supplemental_figure5.png"), + width=10, height=6, units="in", res=300) + par(mar=c(6.1, 6.1, 4.1, 2.1), + cex.main=3, + cex.axis=1.5, + cex.lab=2, + xaxt="n") + + p = par(cex.main=3, cex.axis=1.5, cex.lab=2) + + # boxplot with a broken y-axis + gap.boxplot(data, + gap=list(top=c(80,550),bottom=c(NA,NA)), + main="Running times", xlab="", ylab="time (sec)", + col=colors.boxplot) + # x-axis + labels = c(rep(c("rand", "k++"), 4), + "rand") + axis(1, at=1:9, tick=T, labels=FALSE) + text(x=1:9, + y=-10, + labels=labels, + srt=45, adj=1, xpd=TRUE, cex=1.8) + # y-axis + axis(2, labels=c(seq(0,80,length.out=5), seq(550,650,length.out=5)), + at=c(seq(0,80,length.out=5), seq(550,650,length.out=5)-(550-60))) + # legend + legend("topleft", + legend = c("SPar-K", + "SPar-K (smooth)", + "Kmeans (eucl)", + "Kmeans (corr)", + "ChiPPartitioning"), + col = unique(colors.boxplot), + cex=1.2, lwd=4, bty='n') + grid() +dev.off() + +rm(list=ls()) + + +# ===================================================== Supplemental Figure 8 ===================================================== +# figure with the MNase data at CTCF binding sites partition +# the partition was obtained by running the clustering with a shift of 41, flip and nooutlier +# the otpimal number of cluster was estimated to be 3, the best partition was estimated to be +# the 4th one. Visually, it was providing interesting biological informations but was not +# the partition with the lowest SSE for K=3 (but it was not neither the one with the highest). + +dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark") +source(file.path("res", "functions_utility.R")) +source(file.path("res", "functions_plot.R")) + +#' Order the rows of a given matrix by similarity +#' (correlation) to the aggregation (in descending +#' order) and returns the order. +#' @param data the matrix of interest. +#' @return a vector of indices to reorder the +#' original matrix. +#' @author Romain Groux +get.row.order = function(data) +{ if(is.vector(data)) + { return(c(1)) } + else + { ref = colSums(data) + scores = apply(data, 1, cor, ref) + return(order(scores, decreasing=F)) + } +} + + +# clustering parameters +n.cluster = 4 +n.shift = 41 +flip = TRUE + + +# the data +data = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_mnase_encode.txt"))) +# some additionnal data +dnase = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_dnase_encode_rep1.txt"))) + + as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_dnase_encode_rep2.txt"))) +motif = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_ctcfmotif_encode.txt"))) +tss.plus = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_tss_std+_encode.txt"))) +tss.minus = as.matrix(read.table(file.path("data", "data_chipseq", "ctcf_tss_std-_encode.txt"))) +tss = tss.plus + tss.minus +peaks = read.table(file.path("data", "data_chipseq", "ctcfpeak.sga"), header=F, stringsAsFactors=F) + +# cluster 1 aggregation profiles +chipcor.tss.m = read.table(file.path("results", + "ctcf_mnase_encode2", + sprintf("ctcfpeak_mnase_%dclusters_newkmean_nooutlier_4_cluster1_tss-.txt", + n.cluster))) +chipcor.cage.m = read.table(file.path("results", + "ctcf_mnase_encode2", + sprintf("ctcfpeak_mnase_%dclusters_newkmean_nooutlier_4_cluster1_cage-rep1-.txt", + n.cluster))) +chipcor.dnase = read.table(file.path("results", + "ctcf_mnase_encode2", + sprintf("ctcfpeak_mnase_%dclusters_newkmean_nooutlier_4_cluster1_dnase-rep1.txt", + n.cluster))) +chipcor.mnase = read.table(file.path("results", + "ctcf_mnase_encode2", + sprintf("ctcfpeak_mnase_%dclusters_newkmean_nooutlier_4_cluster1_mnase.txt", + n.cluster))) + +# the best partition +results = read.table(file.path("results", "ctcf_mnase_encode2", + sprintf("ctcf_mnase_encode_%dclusters_nooutlier_4.txt", n.cluster)), + header=T) +results = format.results(data, results, n.shift, n.cluster) + + +# x-axis labels +x.lab = seq(-1000, 1000, length.out=5) +x.at = seq(0, 1, length.out=length(x.lab)) +x.at2 = seq(1, ncol(data), length.out=length(x.lab)) +# heatmap colors +color.1 = colorRampPalette(c("white", "red"), space = "rgb")(100) +color.2 = colorRampPalette(c("white", "blue"), space = "rgb")(100) +# cluster colors +color.lab = brewer.pal(8, "Set1") +# whether a region has a motif +has_motif = apply(motif, 1, sum) +has_motif[which(has_motif > 1)] = 1 +# wheteher a region has a TSS +has_tss = apply(tss, 1, sum) +has_tss[which(has_tss > 1)] = 1 + +# plot +# pdf(file=file.path("results", "article", "supplemental_figure8.pdf"), width=14, height=8) +png(filename=file.path(dest.dir, "supplemental_figure8.png"), + width=14, height=8, units="in", res=300) + + # create matrices with the data and the peaks for the heatmap and a vector of color + # labels to plot the cluster assignment on the side of the heatmap + d = matrix(nrow=nrow(data), ncol=ncol(data)) + p = d + data.aligned = d + motif.aligned = d + dnase.aligned = d + tss.aligned = d + l = vector(mode="character", length=nrow(data)) + from = 1; to = from ; + for(j in 1:n.cluster) + { index = which(results$clusters == j) + to = from + length(index) -1 + d[from:to,] = order.rows(data[index,]) + data.aligned[from:to,] = realign.data(data[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift) + motif.aligned[from:to,] = realign.data(motif[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift) + dnase.aligned[from:to,] = realign.data(dnase[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift) + tss.aligned[from:to,] = realign.data(tss[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift) + order = get.row.order(data.aligned[from:to,]) + data.aligned[from:to,] = data.aligned[from:to,][order,] + motif.aligned[from:to,] = motif.aligned[from:to,][order,] + dnase.aligned[from:to,] = dnase.aligned[from:to,][order,] + tss.aligned[from:to,] = tss.aligned[from:to,][order,] + l[from:to] = color.lab[j] + from = to + 1 + } + + + p = par(oma=c(0,0,5,0)) + # layout construction + labels = c(1, 2, 3, 4, 5, 6, 7, 8, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10,11,11,12,12,13,13, + 9, 10,11,11,12,12,13,13) + lay = layout(matrix(data=labels, nrow=4, ncol=8, byrow=T), widths=c(0.5,5,0.5,5,0.5,5,0.5,5,0.5,5,0.5,5)) + # layout.show(lay) + + # data heatmap + # p = par(mar=c(5, 0, 4, 1) + 0.1) + # plot.label.bar(results$clusters, lwd=2, colors=l) + # p = par(mar=c(5, 10, 4, 4) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + # image(t(condense.matrix(d)), col=color.1, xaxt='n', yaxt='n', main="MNase Data", ylab="", xlab="Position (bp)", + # cex.main=2.5, cex.lab=2.5) + # axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + # text(x=-0.08, y=1.15, labels='A', cex=4, xpd=NA, font=2) + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot(1,1, xaxt='n', yaxt='n', col="white", xlab="", ylab="", main="", bty='n') + p = par(mar=c(5, 10, 4, 4) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(order.rows(data))), col=color.1, xaxt='n', yaxt='n', main="MNase Data", ylab="", xlab="Position (bp)", + cex.main=2.5, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.08, y=1.15, labels='A', cex=4, xpd=NA, font=2) + # realigned data heatmap + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(results$clusters, lwd=2, colors=l) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(data.aligned)), col=color.1, xaxt='n', yaxt='n', main="Aligned MNase", ylab="", xlab="Approx. pos. (bp)", + cex.main=2.5, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.08, y=1.15, labels='B', cex=4, xpd=NA, font=2) + # realigned DNaseI + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(results$clusters, lwd=2, colors=l) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(dnase.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned DNaseI", ylab="", xlab="Approx. pos. (bp)", + cex.main=2.5, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.08, y=1.15, labels='C', cex=4, xpd=NA, font=2) + # realigned motifs + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(results$clusters, lwd=2, colors=l) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(motif.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned motifs", ylab="", xlab="Approx. pos. (bp)", + cex.main=2.5, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.08, y=1.15, labels='D', cex=4, xpd=NA, font=2) + # realigned TSSs + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(results$clusters, lwd=2, colors=l) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(tss.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned TSSs", ylab="", xlab="Approx. pos. (bp)", + cex.main=2.5, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.08, y=1.15, labels='E', cex=4, xpd=NA, font=2) + + # cluster 2 aggregations + x = chipcor.dnase[,1] + y.dnase = chipcor.dnase[,2] + y.mnase = chipcor.mnase[,2] + y.tss.m = chipcor.tss.m[,2] + y.cage.m = chipcor.cage.m[,2] + p = par(mar=c(5.1,6.1,4.1,2.1)) + plot(x=x, y=y.mnase/max(y.mnase), lwd=3, col=color.lab[2], type='l', + xlab="Approximated position (bp)", + ylab="Prop. of max signal", + main="Cluster 1", + ylim=c(0,1.2), + cex.main=2.5, cex.axis=2, cex.lab=2) + lines(x=x, y=y.dnase/max(y.dnase), lwd=3, col=color.lab[1], lty=1) # dnase on both std / at orinted peaks + lines(x=x, y=y.tss.m/max(y.tss.m), lwd=2, col=color.lab[3], lty=1) # tss on - std / at orinted peaks + lines(x=x, y=y.cage.m/max(y.cage.m), lwd=2, col=color.lab[4], lty=1) # cage on - std / at orinted peaks + legend("topright", legend=c("MNase", + "DNaseI", + "TSS -std", + "CAGE -std"), + seg.len=0.5, col=c(color.lab[c(2,1,3,4)]), lwd=c(3,3,2,2), bty="n", cex=1) + text(x=-1100, y=1.42, labels='F', cex=4.5, xpd=NA, font=2) + + # motif proportions + motif_prop = vector(mode="numeric", length=n.cluster) + for(j in 1:n.cluster) + { index = which(results$clusters == j) + motif_prop[j] = sum(has_motif[index]) / length(index) + } + barplot(height=motif_prop, ylim=c(0,1),col=color.lab[1:j], + main="Prop. CTCF motif", xlab="clusters", ylab="Prop. region with motif", + names.arg=1:n.cluster, + cex.main=2.0, cex.lab=2, cex.axis=2) + text(x=-0.08, y=1.15, labels='G', cex=4, xpd=NA, font=2) + + # TSS proportions + tss_prop = vector(mode="numeric", length=n.cluster) + for(j in 1:n.cluster) + { index = which(results$clusters == j) + tss_prop[j] = sum(has_tss[index]) / length(index) + } + barplot(height=tss_prop, ylim=c(0,1),col=color.lab[1:j], + main="Prop. TSS", xlab="clusters", ylab="Prop. region with TSS", + names.arg=1:n.cluster, + cex.main=2.0, cex.lab=2, cex.axis=2) + text(x=-0.08, y=1.15, labels='H', cex=4, xpd=NA, font=2) + + par(p) +dev.off() + +rm(list=ls()) + + + + + +# ===================================================== Figure 1 ===================================================== +# figure with the DNaseI data at SP1 binding sites partition +# the partition was obtained by running the clustering with a shift of 41, flip and nooutlier +# The best partition is the 7th which is the 4th partition with the lowest SSE (the 4 lowest SSE +# values are really close from each other, ~1/600 of diff) and it looks also really nice in terms +# of biology with aligned footprints andclusters loooking different + +dest.dir = file.path("", "local", "groux", "phd_thesis", "images", "ch_spark") +source(file.path("res", "functions_utility.R")) +source(file.path("res", "functions_plot.R")) + + +#' Order the rows of a given matrix by similarity +#' (correlation) to the aggregation (in descending +#' order) and returns the order. +#' @param data the matrix of interest. +#' @return a vector of indices to reorder the +#' original matrix. +#' @author Romain Groux +get.row.order = function(data) +{ if(is.vector(data)) + { return(c(1)) } + else + { ref = colSums(data) + scores = apply(data, 1, cor, ref) + return(order(scores, decreasing=F)) + } +} + + + +# clustering parameters +n.shift = 41 +flip = TRUE +n.cluster = 3 + +# the data +data = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_dnase_big_clean.txt"))) +# some additionnal data +mnase = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_mnase_big_clean.txt"))) +motif = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_sp1motif_big_clean.txt"))) +tss = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_tss_big_clean.txt"))) +tss.plus = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_tss_std+_big.txt"))) +tss.minus = as.matrix(read.table(file.path("data", "sp1_dnase", "sp1peak_tss_std-_big.txt"))) +peaks = read.table(file.path("data", "sp1_dnase", "sp1peak_clean.sga"), header=F, stringsAsFactors=F) + +# this is the best partition to me +results = read.table(file.path("results", "sp1_dnase3", sprintf("sp1peak_dnase_%dclusters_nooutlier_7.txt", n.cluster)), header=T) +results = format.results(data, results, n.shift, n.cluster) + +# cluster 2 aggregation profiles +chipcor.tss.m = read.table(file.path("results", + "sp1_dnase3", + sprintf("sp1peak_dnase_%dclusters_newkmean_nooutlier_7_cluster2_tss-.txt", + n.cluster))) +chipcor.cage.m = read.table(file.path("results", + "sp1_dnase3", + sprintf("sp1peak_dnase_%dclusters_newkmean_nooutlier_7_cluster2_cage-.txt", + n.cluster))) +chipcor.dnase = read.table(file.path("results", + "sp1_dnase3", + sprintf("sp1peak_dnase_%dclusters_newkmean_nooutlier_7_cluster2_dnase_rep1.txt", + n.cluster))) +chipcor.mnase = read.table(file.path("results", + "sp1_dnase3", + sprintf("sp1peak_dnase_%dclusters_newkmean_nooutlier_7_cluster2_mnase.txt", + n.cluster))) + + +x.lab = seq(-300, 300, length.out=5) +x.at = seq(0, 1, length.out=length(x.lab)) +x.at2 = seq(1, ncol(data), length.out=length(x.lab)) +# heatmap colors +color.1 = colorRampPalette(c("white", "red"), space = "rgb")(100) +color.2 = colorRampPalette(c("white", "blue"), space = "rgb")(100) +# cluster colors +color.lab = brewer.pal(8, "Set1") +# whether a region has a motif +has_motif = apply(motif, 1, sum) +has_motif[which(has_motif > 1)] = 1 +# whether a region has a TSS +has_tss = apply(tss, 1, sum) +has_tss[which(has_tss > 1)] = 1 + +# plot +# pdf(file=file.path("results", "article", "figure1.pdf"), width=14, height=8) +png(filename=file.path(dest.dir, "figure1.png"), + width=14, height=8, units="in", res=300) + # create matrices with the data and the peaks for the heatmap and a vector of color + # labels to plot the cluster assignment on the side of the heatmap + d = matrix(nrow=nrow(data), ncol=ncol(data)) + p = d + data.aligned = d + motif.aligned = d + mnase.aligned = d + tss.aligned = d + l = vector(mode="character", length=nrow(data)) + from = 1; to = from ; + for(j in 1:n.cluster) + { index = which(results$clusters == j) + to = from + length(index) -1 + d[from:to,] = order.rows(data[index,]) + data.aligned[from:to,] = realign.data(data[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift) + motif.aligned[from:to,] = realign.data(motif[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift) + mnase.aligned[from:to,] = realign.data(mnase[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift) + tss.aligned[from:to,] = realign.data(tss[index,], results$shifts_ref[index], results$shifts_dat[index], results$flips[index], n.shift) + order = get.row.order(data.aligned[from:to,]) + data.aligned[from:to,] = data.aligned[from:to,][order,] + motif.aligned[from:to,] = motif.aligned[from:to,][order,] + mnase.aligned[from:to,] = mnase.aligned[from:to,][order,] + tss.aligned[from:to,] = tss.aligned[from:to,][order,] + l[from:to] = color.lab[j] + from = to + 1 + } + + p = par(oma=c(0,0,5,0)) + # layout construction + labels = c(1, 2, 3, 4, 5, 6, 7, 8, + 1, 2, 3, 4, 5, 6, 7, 8, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 9,10,10,11,11,11,11, + 9, 9,10,10,11,11,11,11) + lay = layout(matrix(data=labels, nrow=5, ncol=8, byrow=T), widths=c(0.5,5,0.5,5,0.5,5,0.5,5,0.5,5,0.5,5)) + # layout.show(lay) + + # p = par(oma=c(0,0,5,0)) + # # layout construction + # labels = c(1, 2, 3, 4, 5, 6, 7, 8, + # 1, 2, 3, 4, 5, 6, 7, 8, + # 9, 10,11,11,12,12,12,12, + # 9, 10,11,11,12,12,12,12) + # lay = layout(matrix(data=labels, nrow=4, ncol=8, byrow=T), widths=c(0.5,5,0.5,5,0.5,5,0.5,5,0.5,5,0.5,5)) + # layout.show(lay) + + # data heatmap + # p = par(mar=c(5, 0, 4, 1) + 0.1) + # plot.label.bar(results$clusters, lwd=2, colors=l) + # p = par(mar=c(5, 10, 4, 4) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + # image(t(condense.matrix(d)), col=color.1, xaxt='n', yaxt='n', main="DNaseI data", ylab="", xlab="Position (bp)", + # cex.main=2.5, cex.lab=2.5) + # axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + # text(x=-0.07, y=1.1, labels='A', cex=4.5, xpd=NA, font=2) + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot(1,1, xaxt='n', yaxt='n', col="white", xlab="", ylab="", main="", bty='n') + p = par(mar=c(5, 10, 4, 4) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(order.rows(data))), col=color.1, xaxt='n', yaxt='n', main="DNaseI data", ylab="", xlab="Position (bp)", + cex.main=2.5, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.07, y=1.1, labels='A', cex=4.5, xpd=NA, font=2) + # realigned data heatmap + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(results$clusters, lwd=2, colors=l) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(data.aligned)), col=color.1, xaxt='n', yaxt='n', main="Aligned DNaseI", ylab="", xlab="Approx. pos. (bp)", + cex.main=2.5, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.07, y=1.1, labels='B', cex=4.5, xpd=NA, font=2) + # realigned MNase + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(results$clusters, lwd=2, colors=l) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(mnase.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned MNase", ylab="", xlab="Approx. pos. (bp)", + cex.main=2.5, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.07, y=1.1, labels='C', cex=4.5, xpd=NA, font=2) + # realigned motifs + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot.label.bar(results$clusters, lwd=2, colors=l) + p = par(mar=c(5, 0, 4, 1) + 0.1, mai=c(0.6732, 0, 0.5412, 1)) + image(t(condense.matrix(motif.aligned)), col=color.2, xaxt='n', yaxt='n', main="Aligned motifs", ylab="", xlab="Approx. pos. (bp)", + cex.main=2.5, cex.lab=2.5) + axis(side=1, at=x.at, labels=x.lab, cex.axis=1.7) + text(x=-0.07, y=1.1, labels='D', cex=4.5, xpd=NA, font=2) + + # proportion TSS in each cluster + prop_tss = vector(mode="numeric", length=n.cluster) + for(j in 1:n.cluster) + { index = which(results$clusters == j) + prop_tss[j] = sum(has_tss[index]) / length(index) + } + p = par(mar=c(5.1,5.1,4.1,1.1)) + barplot(prop_tss, col=color.lab, + main="Prop. TSS", xlab="clusters", ylab="Prop. region with TSS", + names.arg=1:n.cluster, + cex.main=2.5, cex.lab=2, cex.axis=2) + text(x=-0.2, 0.65, labels='E', cex=4.5, xpd=NA, font=2) + + # cluster 2 aggregations + x = chipcor.dnase[,1] + y.dnase = chipcor.dnase[,2] + y.mnase = chipcor.mnase[,2] + y.tss.m = chipcor.tss.m[,2] + y.cage.m = chipcor.cage.m[,2] + p = par(mar=c(5.1,6.1,4.1,2.1)) + plot(x=x, y=y.mnase/max(y.mnase), lwd=3, col=color.lab[2], type='l', + xlab="Approx. pos. (bp)", + ylab="Prop. of max signal", + main="Cluster 2", + ylim=c(0,1.2), + cex.main=2.5, cex.axis=1.7, cex.lab=2.5) + lines(x=x, y=y.dnase/max(y.dnase), lwd=3, col=color.lab[1], lty=1) # dnase on both std / at orinted peaks + lines(x=x, y=y.tss.m/max(y.tss.m), lwd=2, col=color.lab[3], lty=1) # tss on - std / at orinted peaks + lines(x=x, y=y.cage.m/max(y.cage.m), lwd=2, col=color.lab[4], lty=1) # cage on - std / at orinted peaks + legend("topright", legend=c("MNase", + "DNaseI", + "TSS -std", + "CAGE -std"), + seg.len=0.5, col=c(color.lab[c(2,1,3,4)]), lwd=c(3,3,2,2), bty="n", cex=1) + text(x=-300, 1.5, labels='F', cex=4.5, xpd=NA, font=2) + + # write motif found by MEME + p = par(mar=c(5, 0, 4, 1) + 0.1) + plot(0,0, bty="n", xaxt="n", yaxt="n", main="De novo discovered motifs", xlab="", ylab="", cex.main=2.5, + xlim=c(1,100), ylim=c(1,100), col="white") + # cluster 1 + text(x=0 , y=100, labels="Cluster 1", cex=2, pos=4, font=2, xpd=NA, col=color.lab[1]) + text(x=0 , y=88, labels="*NFYA / NFYB", cex=2, pos=4) + text(x=0 , y=76, labels="*SP related", cex=2, pos=4) + # cluster 2 left + text(x=35 , y=100, labels="Cluster 2 left", cex=2, pos=4, font=2, xpd=NA, col=color.lab[2]) + text(x=35 , y=88, labels="*SP related", cex=2, pos=4) + text(x=35 , y=76, labels="*NFYA / NFYB", cex=2, pos=4) + text(x=35 , y=64, labels=" GATA6 / GATA3", cex=2, pos=4) + text(x=35 , y=52, labels=" SFPI1 / SPIC", cex=2, pos=4) + text(x=35 , y=40, labels=" FOX related", cex=2, pos=4) + text(x=35 , y=28, labels=" ARNTL / BHLHE41", cex=2, pos=4) + # cluster 2 right + text(x=35 , y=16, labels="Cluster 2 right", cex=2, pos=4, font=2, xpd=NA, col=color.lab[2]) + text(x=35 , y=04, labels="*SP related", cex=2, pos=4, xpd=NA) + text(x=35 , y=-8, labels="*NFYA / NFYB", cex=2, pos=4, xpd=NA) + text(x=35 , y=-20, labels=" ARNTL / BHLHE41", cex=2, pos=4, xpd=NA) + # cluster 3 + text(x=75 , y=100, labels="Cluster 3", cex=2, pos=4, font=2, xpd=NA, col=color.lab[3]) + text(x=75 , y=88, labels="*SP related (c)", cex=2, pos=4) + text(x=75 , y=76, labels="*NFYA / NFYB", cex=2, pos=4) + text(x=75 , y=64, labels="*GATA related", cex=2, pos=4) + text(x=75 , y=52, labels=" ETS related", cex=2, pos=4) + text(x=75 , y=40, labels=" ATF1", cex=2, pos=4) + text(x=75 , y=28, labels="*AP1 related", cex=2, pos=4) + text(x=75 , y=16, labels="*NRF1 (c)", cex=2, pos=4) + text(x=02, 125, labels='G', cex=4.5, xpd=NA, font=2) + + par(p) + +dev.off() + +rm(list=ls()) + + diff --git a/scripts/ch_spark/get_figures.sh b/scripts/ch_spark/get_figures.sh index ada5425..b498aec 100644 --- a/scripts/ch_spark/get_figures.sh +++ b/scripts/ch_spark/get_figures.sh @@ -1,15 +1,17 @@ dest_dir='/local/groux/phd_thesis/images/ch_spark' script_dir='/local/groux/phd_thesis/scripts/ch_spark' targ_dir='/local/groux/Kmeans_chipseq/results/article' mkdir -p $dest_dir -cp $targ_dir/figure1.pdf $dest_dir -cp $targ_dir/supplemental_figure1.pdf $dest_dir -cp $targ_dir/supplemental_figure2.pdf $dest_dir -cp $targ_dir/supplemental_figure4.pdf $dest_dir -cp $targ_dir/supplemental_figure5.pdf $dest_dir -cp $targ_dir/supplemental_figure8.pdf $dest_dir +# cp $targ_dir/figure1.pdf $dest_dir +# cp $targ_dir/supplemental_figure1.pdf $dest_dir +# cp $targ_dir/supplemental_figure2.pdf $dest_dir +# cp $targ_dir/supplemental_figure4.pdf $dest_dir +# cp $targ_dir/supplemental_figure5.pdf $dest_dir +# cp $targ_dir/supplemental_figure8.pdf $dest_dir +# redo the article figures in png format, with lower resolution +Rscript $script_dir/figures.R