diff --git a/images/ch_introduction/nucleosome_positioning.png b/images/ch_introduction/nucleosome_positioning.png new file mode 100644 index 0000000..e6d787a Binary files /dev/null and b/images/ch_introduction/nucleosome_positioning.png differ diff --git a/images/ch_introduction/nucleosome_positioning.svg b/images/ch_introduction/nucleosome_positioning.svg new file mode 100644 index 0000000..0c8192b --- /dev/null +++ b/images/ch_introduction/nucleosome_positioning.svg @@ -0,0 +1,8833 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + A + B + phased + fuzzy + fuzzy + + + diff --git a/main/ch_atac-seq.aux b/main/ch_atac-seq.aux index 9718809..5972b32 100644 --- a/main/ch_atac-seq.aux +++ b/main/ch_atac-seq.aux @@ -1,173 +1,173 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{vierstra_genomic_2016} \citation{neph_expansive_2012} \citation{adey_rapid_2010,buenrostro_transposition_2013} \citation{barski_high-resolution_2007} \citation{vierstra_genomic_2016} \citation{vierstra_genomic_2016} \citation{adey_rapid_2010,buenrostro_transposition_2013} \citation{adey_rapid_2010} -\@writefile{toc}{\contentsline {chapter}{\numberline {5}Chromatin accessibility of monocytes}{61}{chapter.5}} +\@writefile{toc}{\contentsline {chapter}{\numberline {5}Chromatin accessibility of monocytes}{63}{chapter.5}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} -\newlabel{atac_seq}{{5}{61}{Chromatin accessibility of monocytes}{chapter.5}{}} -\@writefile{chapter}{\contentsline {toc}{Chromatin accessibility of monocytes}{61}{chapter.5}} -\@writefile{toc}{\contentsline {section}{\numberline {5.1}ATAC-seq}{61}{section.5.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {5.1}{\ignorespaces \textbf {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep {vierstra_genomic_2016}.\relax }}{62}{figure.caption.31}} -\newlabel{atac_seq_atac_seq}{{5.1}{62}{\textbf {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep {vierstra_genomic_2016}.\relax }{figure.caption.31}{}} +\newlabel{atac_seq}{{5}{63}{Chromatin accessibility of monocytes}{chapter.5}{}} +\@writefile{chapter}{\contentsline {toc}{Chromatin accessibility of monocytes}{63}{chapter.5}} +\@writefile{toc}{\contentsline {section}{\numberline {5.1}ATAC-seq}{63}{section.5.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.1}{\ignorespaces \textbf {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep {vierstra_genomic_2016}.\relax }}{64}{figure.caption.32}} +\newlabel{atac_seq_atac_seq}{{5.1}{64}{\textbf {ATAC-seq principle :} ATAC-seq uses a hyperactive Tn5 transposase to simultaneously cleave genomic DNA at accessible loci and ligate adaptors. These adaptors can serve as sequencing barcodes. A subsequent step of ligation allows to add sequencing adaptors. The purified DNA fragments are then subjected to massively parallel sequencing to generate a digital readout of per-nucleotide insertion (transposition event) genome-wide. Figure and legent taken and adapted from \citep {vierstra_genomic_2016}.\relax }{figure.caption.32}{}} \citation{adey_rapid_2010} \citation{neph_expansive_2012} \citation{berest_quantification_2018} \citation{grossman_positional_2018} -\@writefile{toc}{\contentsline {section}{\numberline {5.2}Monitoring TF binding}{63}{section.5.2}} +\@writefile{toc}{\contentsline {section}{\numberline {5.2}Monitoring TF binding}{65}{section.5.2}} \citation{angerer_single_2017} -\@writefile{toc}{\contentsline {section}{\numberline {5.3}The advent of single cell DGF}{64}{section.5.3}} -\@writefile{toc}{\contentsline {section}{\numberline {5.4}Open issues}{64}{section.5.4}} -\@writefile{toc}{\contentsline {section}{\numberline {5.5}Data}{64}{section.5.5}} +\@writefile{toc}{\contentsline {section}{\numberline {5.3}The advent of single cell DGF}{66}{section.5.3}} +\@writefile{toc}{\contentsline {section}{\numberline {5.4}Open issues}{66}{section.5.4}} +\@writefile{toc}{\contentsline {section}{\numberline {5.5}Data}{66}{section.5.5}} \citation{hon_chromasig:_2008} \citation{nielsen_catchprofiles:_2012} \citation{kundaje_ubiquitous_2012} \citation{nair_probabilistic_2014} \citation{groux_spar-k:_2019} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {section}{\numberline {5.6}Identifying over-represented signals}{65}{section.5.6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{65}{subsection.5.6.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.2}EMSequence : an algorithm to identify over-represented sequences}{65}{subsection.5.6.2}} -\@writefile{lof}{\contentsline {figure}{\numberline {5.2}{\ignorespaces \textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure. EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }}{66}{figure.caption.32}} -\newlabel{atac_seq_em}{{5.2}{66}{\textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.\\ EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }{figure.caption.32}{}} +\@writefile{toc}{\contentsline {section}{\numberline {5.6}Identifying over-represented signals}{67}{section.5.6}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{67}{subsection.5.6.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.2}EMSequence : an algorithm to identify over-represented sequences}{67}{subsection.5.6.2}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.2}{\ignorespaces \textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure. EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }}{68}{figure.caption.33}} +\newlabel{atac_seq_em}{{5.2}{68}{\textbf {Illustration of the expectation-maximization algorithms} \textbf {A} illustration of ChIPPartitioning, an algorithm dedicated to the discovery of over-represented chromatin patterns, as described in \citep {nair_probabilistic_2014}. \textbf {B} illustration of EMSequence, an algorithm to discover over-represented DNA motifs. The overall design is the same. Both algorithms model the data has having being sampled from a distribution and perform a maximum-likelihood estimation of the distribution parameters from the data through an iterative procedure.\\ EMJoint algorithm is the combination of both ChIPPartitioning and EMSequence at the same time.\relax }{figure.caption.33}{}} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {subsubsection}{without shift and flip}{67}{figure.caption.32}} -\newlabel{atac_seq_emseq_likelihood}{{5.1}{67}{without shift and flip}{equation.5.6.1}{}} -\newlabel{atac_seq_emseq_update_model}{{5.2}{67}{without shift and flip}{equation.5.6.2}{}} -\@writefile{toc}{\contentsline {subsubsection}{with shift and flip}{67}{equation.5.6.2}} +\@writefile{toc}{\contentsline {subsubsection}{without shift and flip}{69}{figure.caption.33}} +\newlabel{atac_seq_emseq_likelihood}{{5.1}{69}{without shift and flip}{equation.5.6.1}{}} +\newlabel{atac_seq_emseq_update_model}{{5.2}{69}{without shift and flip}{equation.5.6.2}{}} +\@writefile{toc}{\contentsline {subsubsection}{with shift and flip}{69}{equation.5.6.2}} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} -\newlabel{atac_seq_emseq_likelihood_shift_flip}{{5.3}{68}{with shift and flip}{equation.5.6.3}{}} -\newlabel{atac_seq_emseq_reverse_motif}{{5.4}{68}{with shift and flip}{equation.5.6.4}{}} -\newlabel{atac_seq_emseq_update_model_shift_flip}{{5.5}{68}{with shift and flip}{equation.5.6.5}{}} +\newlabel{atac_seq_emseq_likelihood_shift_flip}{{5.3}{70}{with shift and flip}{equation.5.6.3}{}} +\newlabel{atac_seq_emseq_reverse_motif}{{5.4}{70}{with shift and flip}{equation.5.6.4}{}} +\newlabel{atac_seq_emseq_update_model_shift_flip}{{5.5}{70}{with shift and flip}{equation.5.6.5}{}} \citation{nair_probabilistic_2014} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{69}{subsection.5.6.3}} -\newlabel{atac_seq_emjoint_likelihood}{{5.6}{69}{EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{equation.5.6.6}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{71}{subsection.5.6.3}} +\newlabel{atac_seq_emjoint_likelihood}{{5.6}{71}{EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{equation.5.6.6}{}} \citation{voss_dynamic_2014} \citation{cirillo_opening_2002,zaret_pioneer_2011,soufi_pioneer_2015} \citation{buenrostro_transposition_2013} \citation{buenrostro_transposition_2013} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.4}Data realignment}{70}{subsection.5.6.4}} -\@writefile{toc}{\contentsline {section}{\numberline {5.7}Results}{70}{section.5.7}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.7.1}Fragment size analysis}{70}{subsection.5.7.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {5.3}{\ignorespaces \textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }}{71}{figure.caption.33}} -\newlabel{atac_seq_fragment_size}{{5.3}{71}{\textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }{figure.caption.33}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {5.4}{\ignorespaces \textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively. The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }}{72}{figure.caption.34}} -\newlabel{atac_seq_ctcf_all_data}{{5.4}{72}{\textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }{figure.caption.34}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.6.4}Data realignment}{72}{subsection.5.6.4}} +\@writefile{toc}{\contentsline {section}{\numberline {5.7}Results}{72}{section.5.7}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.7.1}Fragment size analysis}{72}{subsection.5.7.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.3}{\ignorespaces \textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }}{73}{figure.caption.34}} +\newlabel{atac_seq_fragment_size}{{5.3}{73}{\textbf {Fragment size analysis} \textbf {A} sequenced fragment size density. The three peaks, from left to right, indicate i) the open chromatin fragments, ii) the mono-nucleosome fragments and iii) the di-nucleosome fragments. A mixture model composed of three Gaussian distributions was fitted to the data in order to model the fragment sizes. The class fit is shown as dashed lines : open chromatin (red), mono-nucleosomes (blue) and di-nucleosomes (green). The violet dashed line show the sum of the three classes. \textbf {B :} probability that a fragment belongs to any of the three fragment classes, given its size i) open chromatin (red), ii) mono-nucleosomes (blue) and iii) di-nucleosomes (green). The vertical dashed lines indicates, for each class, the size limit at which the class probability drops below 0.9. With these limites, the class spans are i) 30-84bp for open chromatin (red), ii) 133-266bp for mono-nucleosomes (blue) and iii) 341-500bp for di-nucleosomes (green). The upper limit of the di-nucleosome class was arbitrarily set to 500bp. \textbf {C :} final fragment classes. Each fragments which size overlapped the size range spanned by a class, was assigned to that class. This ensured a high confidence assignment for more than 134 million fragments, leaving 46 millions of ambiguous and long fragments (>500bp) unassigned.\relax }{figure.caption.34}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.4}{\ignorespaces \textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively. The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }}{74}{figure.caption.35}} +\newlabel{atac_seq_ctcf_all_data}{{5.4}{74}{\textbf {Signal around CTCF motifs : } the human genome was scanned with a CTCF PWM and different aggregated signal densities were measured for open chromatin (red lines), mono nucleosome (blue lines), di-nucleosomes (green lines) and for a pool of mono-nucleosome fragments with di-nucleosomes fragments cut in two at their center position (violet line). \textbf {Top row :} each position of the fragments, from the start of the first read to the end of the second, were used. \textbf {Middle row :} each position of the reads were used. \textbf {Bottom row :} only one position at the read edges for open chromatin fragment and the central position of nucleosome fragment were used. The open chromatin read edges were modified by +4bp and -5bp for +strand and -strand reads respectively.\\ The aggregated densities were measured using bin sizes of 1 (left column), 2 (middle column) and 10bp (right column).\relax }{figure.caption.35}{}} \citation{adey_rapid_2010} \citation{buenrostro_transposition_2013,li_identification_2019} -\@writefile{lof}{\contentsline {figure}{\numberline {5.5}{\ignorespaces \textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }}{73}{figure.caption.35}} -\newlabel{atac_seq_ctcf_sp1_myc_ebf1_footprint}{{5.5}{73}{\textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }{figure.caption.35}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.7.2}Measuring open chromatin and nucleosome occupancy}{73}{subsection.5.7.2}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.5}{\ignorespaces \textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }}{75}{figure.caption.36}} +\newlabel{atac_seq_ctcf_sp1_myc_ebf1_footprint}{{5.5}{75}{\textbf {Signal around CTCF, SP1, myc and EBF1 motifs :} the human genome was scanned with one PWM per TF to predict their binding sites. For each TF, the open chromatin accessibility was measured (red) as well as and the nucleosome occupancy (blue) around their predicted binding sites. For the chromatin accessibility, the corrected read edges were considered and for nucleosomes, the center of the fragments. The motif location is indicated by the dashed lines.\relax }{figure.caption.36}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.7.2}Measuring open chromatin and nucleosome occupancy}{75}{subsection.5.7.2}} \citation{neph_expansive_2012} \citation{fu_insulator_2008} \citation{neph_expansive_2012} \citation{kundaje_ubiquitous_2012} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.7.3}Evaluation of EMSequence and ChIPPartitioning}{75}{subsection.5.7.3}} -\@writefile{toc}{\contentsline {subsubsection}{EMSequence}{75}{subsection.5.7.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.7.3}Evaluation of EMSequence and ChIPPartitioning}{77}{subsection.5.7.3}} +\@writefile{toc}{\contentsline {subsubsection}{EMSequence}{77}{subsection.5.7.3}} \citation{kent_blatblast-like_2002} \citation{chatr-aryamontri_biogrid_2017} \citation{castro-mondragon_rsat_2017} -\@writefile{lof}{\contentsline {figure}{\numberline {5.6}{\ignorespaces \textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }}{76}{figure.caption.36}} -\newlabel{atac_seq_emseq_auc_roc}{{5.6}{76}{\textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }{figure.caption.36}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {5.7}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }}{77}{figure.caption.37}} -\newlabel{atac_seq_emseq_sp1_10class}{{5.7}{77}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }{figure.caption.37}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.6}{\ignorespaces \textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }}{78}{figure.caption.37}} +\newlabel{atac_seq_emseq_auc_roc}{{5.6}{78}{\textbf {Classification performances on simulated data :} \textbf {Left} 50 different data partitions were run using EMSequence. The discovered models were then used to assign a class label to each sequence. These assigned labels were then compared to the true labels using the AUC under the ROC curve. The red line indicates the AUC value achieved by the true motifs. \textbf {Right} the 50 ROC curves corresponding to each partition. The red lines indicates the true motifs ROC curve. The curves under the diagonal are the cases where the 1st discovered class corresponded to the 2nd true class and vice-versa. For these cases, the AUC is actually the area over the curve.\relax }{figure.caption.37}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.7}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }}{79}{figure.caption.38}} +\newlabel{atac_seq_emseq_sp1_10class}{{5.7}{79}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates tandem arrangements of SP1 motifs.\relax }{figure.caption.38}{}} \citation{nair_probabilistic_2014} -\@writefile{lof}{\contentsline {figure}{\numberline {5.8}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{78}{figure.caption.38}} -\newlabel{atac_seq_emread_ctcf_noshift_flip}{{5.8}{78}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.38}{}} -\@writefile{toc}{\contentsline {subsubsection}{ChIPPartitioning}{78}{figure.caption.37}} -\@writefile{lof}{\contentsline {figure}{\numberline {5.9}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{79}{figure.caption.39}} -\newlabel{atac_seq_emread_ctcf_shift_flip}{{5.9}{79}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.39}{}} -\@writefile{toc}{\contentsline {section}{\numberline {5.8}Aligning the binding sites}{80}{section.5.8}} -\@writefile{lof}{\contentsline {figure}{\numberline {5.10}{\ignorespaces \textbf {Central parts of the extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{81}{figure.caption.40}} -\newlabel{atac_seq_23class}{{5.10}{81}{\textbf {Central parts of the extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.40}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.8}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{80}{figure.caption.39}} +\newlabel{atac_seq_emread_ctcf_noshift_flip}{{5.8}{80}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning without shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.39}{}} +\@writefile{toc}{\contentsline {subsubsection}{ChIPPartitioning}{80}{figure.caption.38}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.9}{\ignorespaces \textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{81}{figure.caption.40}} +\newlabel{atac_seq_emread_ctcf_shift_flip}{{5.9}{81}{\textbf {Open chromatin classes around CTCF motifs} found by ChIPPartitioning with shifing but with flipping to identify different classes of footprints around 26'650 CTCF motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.40}{}} +\@writefile{toc}{\contentsline {section}{\numberline {5.8}Aligning the binding sites}{82}{section.5.8}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.10}{\ignorespaces \textbf {Central parts of the extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{83}{figure.caption.41}} +\newlabel{atac_seq_23class}{{5.10}{83}{\textbf {Central parts of the extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.41}{}} \citation{kurotaki_transcriptional_2017,rico_comparative_2017} \citation{castro-mondragon_rsat_2017} -\@writefile{lof}{\contentsline {figure}{\numberline {5.11}{\ignorespaces \textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{83}{figure.caption.41}} -\newlabel{atac_seq_ctcf_subclass}{{5.11}{83}{\textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.41}{}} -\@writefile{toc}{\contentsline {section}{\numberline {5.9}Exploring individual TF classes}{83}{section.5.9}} +\@writefile{lof}{\contentsline {figure}{\numberline {5.11}{\ignorespaces \textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{85}{figure.caption.42}} +\newlabel{atac_seq_ctcf_subclass}{{5.11}{85}{\textbf {CTCF sub-classes} obtained by extracting CTCF class data and subjecting them to a ChIPPartitioning classification into 8 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.42}{}} +\@writefile{toc}{\contentsline {section}{\numberline {5.9}Exploring individual TF classes}{85}{section.5.9}} \citation{marsland_machine_2015-1} \citation{fan_characterizing_2016,kiselev_sc3:_2017} \citation{aibar_scenic:_2017} \citation{gonzalez-blas_cistopic:_2019} -\@writefile{toc}{\contentsline {section}{\numberline {5.10}Discussions}{84}{section.5.10}} -\@writefile{toc}{\contentsline {section}{\numberline {5.11}Perspectives}{84}{section.5.11}} -\@writefile{toc}{\contentsline {section}{\numberline {5.12}Methods}{85}{section.5.12}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.1}Partitioning programs}{85}{subsection.5.12.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.2}Fragment classes}{85}{subsection.5.12.2}} -\newlabel{atac_seq_fragment_length_class}{{5.7}{85}{Fragment classes}{equation.5.12.7}{}} +\@writefile{toc}{\contentsline {section}{\numberline {5.10}Discussions}{86}{section.5.10}} +\@writefile{toc}{\contentsline {section}{\numberline {5.11}Perspectives}{86}{section.5.11}} +\@writefile{toc}{\contentsline {section}{\numberline {5.12}Methods}{87}{section.5.12}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.1}Partitioning programs}{87}{subsection.5.12.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.2}Fragment classes}{87}{subsection.5.12.2}} +\newlabel{atac_seq_fragment_length_class}{{5.7}{87}{Fragment classes}{equation.5.12.7}{}} \citation{castro-mondragon_rsat_2017} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.3}Simulated sequences}{86}{subsection.5.12.3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.4}Realignment using JASPAR motifs}{86}{subsection.5.12.4}} -\@writefile{lot}{\contentsline {table}{\numberline {5.1}{\ignorespaces \textbf {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }}{87}{table.caption.42}} -\newlabel{atac_seq_motif_table}{{5.1}{87}{\textbf {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }{table.caption.42}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.3}Simulated sequences}{88}{subsection.5.12.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.4}Realignment using JASPAR motifs}{88}{subsection.5.12.4}} +\@writefile{lot}{\contentsline {table}{\numberline {5.1}{\ignorespaces \textbf {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }}{89}{table.caption.43}} +\newlabel{atac_seq_motif_table}{{5.1}{89}{\textbf {TF binding models} from JASPAR matrix clustering. Each model can be retrieved within JASPAR matrix clustering (\url {http://jaspar2018.genereg.net/matrix-clusters/vertebrates/?detail=true}) using the cluster and node ID. "TFs covered" refers to all TF which models are children of the given node. "Name" refers to the label this model is referred to in the text and figures.\relax }{table.caption.43}{}} \citation{nair_probabilistic_2014} \citation{dalton_clustering_2009} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.5}Model extension}{88}{subsection.5.12.5}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.6}Extracting data assigned to a class}{88}{subsection.5.12.6}} -\newlabel{encode_peaks_algo_ndr_extend}{{3}{90}{Extracting data assigned to a class}{algocfline.3}{}} -\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Computes a matrix containing the data assigned to a given class $S$.\relax }}{90}{algocf.3}} -\newlabel{atac_seq_algo_extract_class}{{3}{90}{Extracting data assigned to a class}{algocf.3}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.7}Peak processing}{91}{subsection.5.12.7}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.8}Per TF classes}{91}{subsection.5.12.8}} -\newlabel{atac_seq_method_per_tf_class}{{5.12.8}{91}{Per TF classes}{subsection.5.12.8}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.9}Per TF sub-classes}{91}{subsection.5.12.9}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.5}Model extension}{90}{subsection.5.12.5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.6}Extracting data assigned to a class}{90}{subsection.5.12.6}} +\newlabel{encode_peaks_algo_ndr_extend}{{3}{92}{Extracting data assigned to a class}{algocfline.3}{}} +\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Computes a matrix containing the data assigned to a given class $S$.\relax }}{92}{algocf.3}} +\newlabel{atac_seq_algo_extract_class}{{3}{92}{Extracting data assigned to a class}{algocf.3}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.7}Peak processing}{93}{subsection.5.12.7}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.8}Per TF classes}{93}{subsection.5.12.8}} +\newlabel{atac_seq_method_per_tf_class}{{5.12.8}{93}{Per TF classes}{subsection.5.12.8}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.12.9}Per TF sub-classes}{93}{subsection.5.12.9}} \@setckpt{main/ch_atac-seq}{ -\setcounter{page}{93} +\setcounter{page}{95} \setcounter{equation}{7} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{5} \setcounter{section}{12} \setcounter{subsection}{9} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{11} \setcounter{table}{1} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{39} \setcounter{algocfline}{3} \setcounter{algocfproc}{3} \setcounter{algocf}{3} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/main/ch_encode_peaks.aux b/main/ch_encode_peaks.aux index 4a3b2ab..a94dd0b 100644 --- a/main/ch_encode_peaks.aux +++ b/main/ch_encode_peaks.aux @@ -1,153 +1,153 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{cheng_understanding_2012} \citation{cheng_understanding_2012} \citation{mathelier_jaspar_2014} \citation{kulakovskiy_hocomoco:_2016} \citation{jolma_dna-binding_2013} \citation{cheng_understanding_2012} \citation{mathelier_jaspar_2014} \citation{kulakovskiy_hocomoco:_2016} \citation{jolma_dna-binding_2013} \citation{cheng_understanding_2012} \citation{gerstein_architecture_2012} \citation{wu_biogps:_2016} \citation{ghirlando_ctcf:_2016} -\@writefile{toc}{\contentsline {chapter}{\numberline {3}ENCODE peaks analysis}{27}{chapter.3}} +\@writefile{toc}{\contentsline {chapter}{\numberline {3}ENCODE peaks analysis}{29}{chapter.3}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} -\newlabel{encode_peaks}{{3}{27}{ENCODE peaks analysis}{chapter.3}{}} -\@writefile{toc}{\contentsline {chapter}{ENCODE peaks analysis}{27}{chapter.3}} -\@writefile{toc}{\contentsline {section}{\numberline {3.1}Data}{27}{section.3.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces \textbf {Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed lines indicate 20'000 and 40'000.\relax }}{28}{figure.caption.19}} -\newlabel{encode_peaks_gm12878_peak_number}{{3.1}{28}{\textbf {Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed lines indicate 20'000 and 40'000.\relax }{figure.caption.19}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces \textbf {Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed line indicates 0.5.\relax }}{28}{figure.caption.20}} -\newlabel{encode_peaks_gm12878_motif_prop}{{3.2}{28}{\textbf {Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed line indicates 0.5.\relax }{figure.caption.20}{}} +\newlabel{encode_peaks}{{3}{29}{ENCODE peaks analysis}{chapter.3}{}} +\@writefile{toc}{\contentsline {chapter}{ENCODE peaks analysis}{29}{chapter.3}} +\@writefile{toc}{\contentsline {section}{\numberline {3.1}Data}{29}{section.3.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces \textbf {Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed lines indicate 20'000 and 40'000.\relax }}{30}{figure.caption.20}} +\newlabel{encode_peaks_gm12878_peak_number}{{3.1}{30}{\textbf {Number of peaks in GM12878} called by ENCODE for each TF ChIP-seq experiment. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed lines indicate 20'000 and 40'000.\relax }{figure.caption.20}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces \textbf {Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed line indicates 0.5.\relax }}{30}{figure.caption.21}} +\newlabel{encode_peaks_gm12878_motif_prop}{{3.2}{30}{\textbf {Proportion of peaks with a motif in GM12878}, for each TF ChIP-seq experiment, in green. Assuming that a TF binds to DNA through its motif, the motif should be nearby the peak center. Thus the center of each peak was scanned using a PWM describing the TF binding specificity. Each TF was associated to a log-odd PWM contained either in JASPAR Core vertebrate 2014 \citep {mathelier_jaspar_2014}, HOCOMOCO v10 \citep {kulakovskiy_hocomoco:_2016} or Jolma \citep {jolma_dna-binding_2013} collection. If a motif instance with a score corresponding to a pvalue higher or equal to $1\cdot 10^{-4}$ could be found, the peak was considered bearing a motif. The different TFs are colored by type, as defined by \citep {cheng_understanding_2012} : sequence specific TF (TFSS), non specific TF (TFNS), chromatin structure (ChromStr), chromatin modifier (ChromModif), RNAPII associated factors (Pol2), RNAPIII associated factors (Pol3) and others. The horizontal dashed line indicates 0.5.\relax }{figure.caption.21}{}} \citation{hon_chromasig:_2008,nielsen_catchprofiles:_2012,kundaje_ubiquitous_2012,nair_probabilistic_2014,groux_spar-k:_2019} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {section}{\numberline {3.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{29}{section.3.2}} -\newlabel{encode_peaks_chippartitioning}{{3.2}{29}{ChIPPartitioning : an algorithm to identify chromatin architectures}{section.3.2}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.2.1}Data realignment}{30}{subsection.3.2.1}} -\newlabel{encode_peaks_data_realign}{{3.2.1}{30}{Data realignment}{subsection.3.2.1}{}} +\@writefile{toc}{\contentsline {section}{\numberline {3.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{31}{section.3.2}} +\newlabel{encode_peaks_chippartitioning}{{3.2}{31}{ChIPPartitioning : an algorithm to identify chromatin architectures}{section.3.2}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.2.1}Data realignment}{32}{subsection.3.2.1}} +\newlabel{encode_peaks_data_realign}{{3.2.1}{32}{Data realignment}{subsection.3.2.1}{}} \citation{zhang_canonical_2014} -\@writefile{toc}{\contentsline {section}{\numberline {3.3}Nucleosome organization around transcription factor binding sites}{31}{section.3.3}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.3}{\ignorespaces \textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.\relax }}{32}{figure.caption.21}} -\newlabel{encode_peaks_array_measure}{{3.3}{32}{\textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.\relax }{figure.caption.21}{}} +\@writefile{toc}{\contentsline {section}{\numberline {3.3}Nucleosome organization around transcription factor binding sites}{33}{section.3.3}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.3}{\ignorespaces \textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.\relax }}{34}{figure.caption.22}} +\newlabel{encode_peaks_array_measure}{{3.3}{34}{\textbf {Chromatin pattern around TF binding sites in GM12878 :} \textbf {A} For each peaklist, nucleosome occupancy was measured +/- 1kb around each individual TFBS using 10bp bins. The TFBS were then classified into 4 classes according to their nucleosome patterns using a ChIPPartitioning, allowing the patterns to be flipped and shifted. Each TF binding site was assigned a probability to belong to each of the 4 classes with a given values of shift and flip. To assess the extent of a given TF to i) display nucleosomes arrays on its flank and ii) to have nucleosome positioned with respect to its binding sites, array density and shift probability standard deviation have been measured for each class. Classes having a mean array density above 0.4 and a shift probability standard deviation under 3.5 and other custom classes are highlighted. Classes are named using the TF, the laboratory which produced the data and the class number (from 1 to 4). \textbf {B} Examples of class patterns corresponding to some of the highlighted classes for CTCF, ATF3, YY1, EBF1 and ZNF143. MNase profiles (red) were allowed to be shifted and flipped and DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The y-axis scale represent the proportion of the highest signal for each chromatin pattern.\relax }{figure.caption.22}{}} \citation{kundaje_ubiquitous_2012,fu_insulator_2008} -\@writefile{toc}{\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{33}{section.3.4}} -\newlabel{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}{{3.4}{33}{The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{section.3.4}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.4}{\ignorespaces \textbf { Colocalization with CTCF peaks in GM12878 cells : } \textbf {A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf {B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf {C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.\relax }}{34}{figure.caption.22}} -\newlabel{encode_peaks_colocalization_ctcf}{{3.4}{34}{\textbf { Colocalization with CTCF peaks in GM12878 cells : } \textbf {A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf {B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf {C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.\relax }{figure.caption.22}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.5}{\ignorespaces \textbf {Nucleosome free region at CTCF binding sites} \textbf {a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }}{35}{figure.caption.23}} -\newlabel{encode_peaks_ctcf_ndr}{{3.5}{35}{\textbf {Nucleosome free region at CTCF binding sites} \textbf {a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }{figure.caption.23}{}} +\@writefile{toc}{\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{35}{section.3.4}} +\newlabel{encode_peaks_section_ctcf_rad21_smc3_yy1_znf143}{{3.4}{35}{The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{section.3.4}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.4}{\ignorespaces \textbf { Colocalization with CTCF peaks in GM12878 cells : } \textbf {A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf {B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf {C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.\relax }}{36}{figure.caption.23}} +\newlabel{encode_peaks_colocalization_ctcf}{{3.4}{36}{\textbf { Colocalization with CTCF peaks in GM12878 cells : } \textbf {A} Proportion of peaks for different TFs having a CTCF peak within 10bp, 50bp and 100bp. The colours indicate different TFs. The CTCF peaklist used as reference to assess CTCF presence was CTCF.Sydh (in red), the two RAD21 peaklists are RAD21.Haib and RAD21.Sydh respectively (in blue), the SMC3 peaklist is SMC3.Sydh (in green), the YY1 peaklist is YY1.Haib (in orange) and the ZNF143 peaklist is ZNF143.Sydh (in violet). \textbf {B} Venn diagrams showing the proportion of peaks for each TF with i) an instance of its own motif, ii) a CTCF.Sydh peak within 100bp, iii) both or iv) neither of them. RAD21 and SMC3 are not represented as there is no PWM available to describe their sequence specificity. \textbf {C} ChIPPartitioning classification with shift and flip of MNase patterns +/- 1kb of YY1.Haib peaks using 10bp bins. YY1 peaks with (upper row) and without (lower row) a CTCF peak within 100bp. Two classes were used to account for "typical" and "non-typical" looking MNase patterns. DNaseI (blue), TSS density (violet) and sequence conservation (green) were overlaid according to MNase classification (taking into account both shift and flip). The number at the upper right corner of each plot indicate the overall class probability. The number of YY1 peaks is slightly smaller than in B) because peaks showing no MNase reads were not included in the classification analysis. Peaklists are named using the TF together with the laboratory which produced the data.\relax }{figure.caption.23}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.5}{\ignorespaces \textbf {Nucleosome free region at CTCF binding sites} \textbf {a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }}{37}{figure.caption.24}} +\newlabel{encode_peaks_ctcf_ndr}{{3.5}{37}{\textbf {Nucleosome free region at CTCF binding sites} \textbf {a} The length are represented as boxplots. The CTCF binding sites are divided into subgroups according to additional presence of SCM3, RAD21, YY1 or ZNF143. The number of binding sites in each subgroup is indicated in red above the boxplots. The presence of SMC3 only, RAD21 only and SMC3 and RAD21 together are indicated in violet, blue and orange respectively. \textbf {B} The proportion of peaks (in green), in each subgroup, having a TSS within a 1kb.\relax }{figure.caption.24}{}} \citation{stedman_cohesins_2008} \citation{losada_cohesin_2014} \citation{donohoe_identification_2007} \citation{bailey_znf143_2015} \citation{ong_ctcf:_2014,ghirlando_ctcf:_2016} \citation{wang_sequence_2012,neph_expansive_2012,consortium_integrated_2012,guo_high_2012} \citation{chatr-aryamontri_biogrid_2017} \citation{wang_sequence_2012,neph_expansive_2012,consortium_integrated_2012,guo_high_2012} \citation{chatr-aryamontri_biogrid_2017} -\@writefile{lof}{\contentsline {figure}{\numberline {3.6}{\ignorespaces \textbf {Possible interaction scenarios between TFs} \textbf {A} Indirect co-binding. The TFs dimerize and bind together on DNA. \textbf {B} Indirect co-binding. Both TF dimerize but only one binds the DNA, the other (the blue) is the tethering factor. \textbf {C} Independent co-binding. Both TF bind in close vicinity but without forming a complex. Both TFs may not be necessarily bound at the same time. \textbf {D} Interference. Both motifs partially or totally overlap each other. Whether only one TF or both can bind at the same time is unknown.\relax }}{37}{figure.caption.24}} -\newlabel{encode_peaks_tf_association}{{3.6}{37}{\textbf {Possible interaction scenarios between TFs} \textbf {A} Indirect co-binding. The TFs dimerize and bind together on DNA. \textbf {B} Indirect co-binding. Both TF dimerize but only one binds the DNA, the other (the blue) is the tethering factor. \textbf {C} Independent co-binding. Both TF bind in close vicinity but without forming a complex. Both TFs may not be necessarily bound at the same time. \textbf {D} Interference. Both motifs partially or totally overlap each other. Whether only one TF or both can bind at the same time is unknown.\relax }{figure.caption.24}{}} -\@writefile{toc}{\contentsline {section}{\numberline {3.5}CTCF and JunD interactomes}{37}{section.3.5}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.7}{\ignorespaces \textbf {CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf {B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref {encode_peaks_methods_data}).\relax }}{38}{figure.caption.25}} -\newlabel{encode_peaks_ctcf_association}{{3.7}{38}{\textbf {CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf {B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref {encode_peaks_methods_data}).\relax }{figure.caption.25}{}} -\@writefile{lot}{\contentsline {table}{\numberline {3.1}{\ignorespaces \textbf {Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite {wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep {chatr-aryamontri_biogrid_2017}.\relax }}{39}{table.caption.26}} -\newlabel{encode_peaks_association_table}{{3.1}{39}{\textbf {Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite {wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep {chatr-aryamontri_biogrid_2017}.\relax }{table.caption.26}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.6}{\ignorespaces \textbf {Possible interaction scenarios between TFs} \textbf {A} Indirect co-binding. The TFs dimerize and bind together on DNA. \textbf {B} Indirect co-binding. Both TF dimerize but only one binds the DNA, the other (the blue) is the tethering factor. \textbf {C} Independent co-binding. Both TF bind in close vicinity but without forming a complex. Both TFs may not be necessarily bound at the same time. \textbf {D} Interference. Both motifs partially or totally overlap each other. Whether only one TF or both can bind at the same time is unknown.\relax }}{39}{figure.caption.25}} +\newlabel{encode_peaks_tf_association}{{3.6}{39}{\textbf {Possible interaction scenarios between TFs} \textbf {A} Indirect co-binding. The TFs dimerize and bind together on DNA. \textbf {B} Indirect co-binding. Both TF dimerize but only one binds the DNA, the other (the blue) is the tethering factor. \textbf {C} Independent co-binding. Both TF bind in close vicinity but without forming a complex. Both TFs may not be necessarily bound at the same time. \textbf {D} Interference. Both motifs partially or totally overlap each other. Whether only one TF or both can bind at the same time is unknown.\relax }{figure.caption.25}{}} +\@writefile{toc}{\contentsline {section}{\numberline {3.5}CTCF and JunD interactomes}{39}{section.3.5}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.7}{\ignorespaces \textbf {CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf {B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref {encode_peaks_methods_data}).\relax }}{40}{figure.caption.26}} +\newlabel{encode_peaks_ctcf_association}{{3.7}{40}{\textbf {CTCF motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The CTCF dataset ORs are too high to be represented in this plot. \textbf {B} Density of CTCF motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif. The absence of CTCF motif within the first 70bp around CTCF binding sites is explained by the peak processing (see section \ref {encode_peaks_methods_data}).\relax }{figure.caption.26}{}} +\@writefile{lot}{\contentsline {table}{\numberline {3.1}{\ignorespaces \textbf {Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite {wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep {chatr-aryamontri_biogrid_2017}.\relax }}{41}{table.caption.27}} +\newlabel{encode_peaks_association_table}{{3.1}{41}{\textbf {Identified associations : } Details of all the TF associations identified, as well as the possible molecular mechanisms explaining them. The columns 'TF${_A}$' and 'TF${_B}$' refer to the TF involved in the association, 'Motif.ass.' to whether both motif are associated together ('positive') or repel each other ('negative'), as measured by the Fisher test, 'Type' to the proposed interaction mechanism between both TFs, 'Binder' to the TF binding DNA in case of an indirect co-binding, the value 'both' means that both tethering complexes may exist, 'Reported' to whether this interaction has already been reported in one of the following study \cite {wang_sequence_2012, neph_expansive_2012, consortium_integrated_2012, guo_high_2012} and 'Validated' to whether this physical association is experimentally validated and reported in BioGRID v.3.4.145 \citep {chatr-aryamontri_biogrid_2017}.\relax }{table.caption.27}{}} \citation{heinz_simple_2010} \citation{wang_sequence_2012,neph_expansive_2012,consortium_integrated_2012,guo_high_2012,chatr-aryamontri_biogrid_2017} \citation{gaffney_controls_2012} \citation{gaffney_controls_2012} \citation{boller_defining_2018} \citation{hagman_early_2005} \citation{maier_early_2004,boller_pioneering_2016} -\@writefile{toc}{\contentsline {section}{\numberline {3.6}EBF1 binds nucleosomes}{42}{section.3.6}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.8}{\ignorespaces \textbf {EBF1 binding sites} stand on the edge of a nucleosome. \textbf {A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep {gaffney_controls_2012}. \textbf {B} Dinucleotide frequencies around the dyads of the nucleosomes that have an EBF1 binding site within 100bp. \textbf {C} Motif frequency around the dyads of nucleosomes that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{43}{figure.caption.27}} -\newlabel{encode_peaks_ebf1}{{3.8}{43}{\textbf {EBF1 binding sites} stand on the edge of a nucleosome. \textbf {A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep {gaffney_controls_2012}. \textbf {B} Dinucleotide frequencies around the dyads of the nucleosomes that have an EBF1 binding site within 100bp. \textbf {C} Motif frequency around the dyads of nucleosomes that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.27}{}} +\@writefile{toc}{\contentsline {section}{\numberline {3.6}EBF1 binds nucleosomes}{44}{section.3.6}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.8}{\ignorespaces \textbf {EBF1 binding sites} stand on the edge of a nucleosome. \textbf {A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep {gaffney_controls_2012}. \textbf {B} Dinucleotide frequencies around the dyads of the nucleosomes that have an EBF1 binding site within 100bp. \textbf {C} Motif frequency around the dyads of nucleosomes that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{45}{figure.caption.28}} +\newlabel{encode_peaks_ebf1}{{3.8}{45}{\textbf {EBF1 binding sites} stand on the edge of a nucleosome. \textbf {A} Nucleosome dyad distributions around the EBF1 binding sites (from the Haib dataset). The dyad distributions have been measured from two independent datasets : i) MNase-seq data released by the ENCODE Consortium (in red) and by Gaffney et al. (in blue) \citep {gaffney_controls_2012}. \textbf {B} Dinucleotide frequencies around the dyads of the nucleosomes that have an EBF1 binding site within 100bp. \textbf {C} Motif frequency around the dyads of nucleosomes that have an EBF1 binding site within 100bp. The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.28}{}} \citation{trifonov_cracking_2011} \citation{ioshikhes_variety_2011,trifonov_cracking_2011,gaffney_controls_2012} \citation{dreos_mga_2018} \citation{gerstein_architecture_2012} \citation{mathelier_jaspar_2014} \citation{kulakovskiy_hocomoco:_2016} \citation{jolma_dna-binding_2013} -\@writefile{toc}{\contentsline {section}{\numberline {3.7}Methods}{44}{section.3.7}} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.1}Data and data processing}{44}{subsection.3.7.1}} -\newlabel{encode_peaks_methods_data}{{3.7.1}{44}{Data and data processing}{subsection.3.7.1}{}} +\@writefile{toc}{\contentsline {section}{\numberline {3.7}Methods}{46}{section.3.7}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.1}Data and data processing}{46}{subsection.3.7.1}} +\newlabel{encode_peaks_methods_data}{{3.7.1}{46}{Data and data processing}{subsection.3.7.1}{}} \citation{gaffney_controls_2012} \citation{boyle_high-resolution_2008} \citation{dreos_eukaryotic_2017} \citation{siepel_evolutionarily_2005} \citation{ambrosini_chip-seq_2016} \citation{nair_probabilistic_2014} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.2}Classification of MNase patterns}{45}{subsection.3.7.2}} -\newlabel{encode_peaks_em_mnase}{{3.7.2}{45}{Classification of MNase patterns}{subsection.3.7.2}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.2}Classification of MNase patterns}{47}{subsection.3.7.2}} +\newlabel{encode_peaks_em_mnase}{{3.7.2}{47}{Classification of MNase patterns}{subsection.3.7.2}{}} \citation{zhang_canonical_2014} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.3}Quantifying nucleosome array intensity from classification results}{46}{subsection.3.7.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.3}Quantifying nucleosome array intensity from classification results}{48}{subsection.3.7.3}} \citation{ambrosini_chip-seq_2016} \citation{ambrosini_chip-seq_2016} -\newlabel{encode_peaks_equation_shift_density1}{{3.1}{47}{Quantifying nucleosome array intensity from classification results}{equation.3.7.1}{}} -\newlabel{encode_peaks_equation_shift_density2}{{3.2}{47}{Quantifying nucleosome array intensity from classification results}{equation.3.7.2}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.4}Peak colocalization}{47}{subsection.3.7.4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.5}NDR detection}{47}{subsection.3.7.5}} -\newlabel{encode_peaks_algo_ndr_extend}{{1}{49}{NDR detection}{algocfline.1}{}} -\@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Searches the coordinates of the NDR using the posterior nucleosome and nucleosome free class probabilities, for a region $R_i$, from its central position.\relax }}{49}{algocf.1}} +\newlabel{encode_peaks_equation_shift_density1}{{3.1}{49}{Quantifying nucleosome array intensity from classification results}{equation.3.7.1}{}} +\newlabel{encode_peaks_equation_shift_density2}{{3.2}{49}{Quantifying nucleosome array intensity from classification results}{equation.3.7.2}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.4}Peak colocalization}{49}{subsection.3.7.4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.5}NDR detection}{49}{subsection.3.7.5}} +\newlabel{encode_peaks_algo_ndr_extend}{{1}{51}{NDR detection}{algocfline.1}{}} +\@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Searches the coordinates of the NDR using the posterior nucleosome and nucleosome free class probabilities, for a region $R_i$, from its central position.\relax }}{51}{algocf.1}} \citation{mathelier_jaspar_2014} \citation{kulakovskiy_hocomoco:_2016} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_chip-seq_2016-1} \citation{ambrosini_chip-seq_2016-1} \citation{ambrosini_chip-seq_2016-1} \citation{gaffney_controls_2012} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.6}CTCF and JunD interactors}{50}{subsection.3.7.6}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.6}CTCF and JunD interactors}{52}{subsection.3.7.6}} \citation{ambrosini_signal_2003} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.7}EBF1 and nucleosome}{51}{subsection.3.7.7}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.7.7}EBF1 and nucleosome}{53}{subsection.3.7.7}} \@setckpt{main/ch_encode_peaks}{ -\setcounter{page}{53} +\setcounter{page}{55} \setcounter{equation}{6} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{3} \setcounter{section}{7} \setcounter{subsection}{7} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{8} \setcounter{table}{1} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{28} \setcounter{algocfline}{1} \setcounter{algocfproc}{1} \setcounter{algocf}{1} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/main/ch_group_projects.aux b/main/ch_group_projects.aux index dcfa0ea..0fc06f8 100644 --- a/main/ch_group_projects.aux +++ b/main/ch_group_projects.aux @@ -1,180 +1,180 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{dreos_mga_2018} \citation{barrett_ncbi_2011} \citation{rustici_arrayexpress_2013} \citation{dreos_mga_2018} \citation{dreos_mga_2018} \citation{consortium_integrated_2012} \citation{roadmap_epigenomics_consortium_integrative_2015} \citation{lizio_gateways_2015} -\@writefile{toc}{\contentsline {chapter}{\numberline {2}Published laboratory projects}{7}{chapter.2}} +\@writefile{toc}{\contentsline {chapter}{\numberline {2}Published laboratory projects}{9}{chapter.2}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} -\newlabel{lab_projects}{{2}{7}{Published laboratory projects}{chapter.2}{}} -\@writefile{toc}{\contentsline {chapter}{Published laboratory projects}{7}{chapter.2}} -\@writefile{toc}{\contentsline {section}{\numberline {2.1}Mass Genome Annotation repository}{7}{section.2.1}} -\newlabel{section_mga}{{2.1}{7}{Mass Genome Annotation repository}{section.2.1}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.1.1}Introduction}{7}{subsection.2.1.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.1.2}MGA content and organization}{7}{subsection.2.1.2}} -\@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces \textbf {Content of the MGA repository by 2018} \textbf {A} Proportion of samples in the database grouped by type. \textbf {B} Proportion of samples grouped by organism. Assemblies belonging to the same organism are merged together. \textbf {C} Samples numbers stratified by type and organism. Dot areas are proportional to the total number of samples in that category. The corresponding numbers can be found in a weakly updated table posted on the MGA home page at \url {http://ccg.vital-it.ch/mga}. Figure and legend taken and adapted from \citep {dreos_mga_2018}.\relax }}{8}{figure.caption.8}} -\newlabel{lab_projects_mga_stats}{{2.1}{8}{\textbf {Content of the MGA repository by 2018} \textbf {A} Proportion of samples in the database grouped by type. \textbf {B} Proportion of samples grouped by organism. Assemblies belonging to the same organism are merged together. \textbf {C} Samples numbers stratified by type and organism. Dot areas are proportional to the total number of samples in that category. The corresponding numbers can be found in a weakly updated table posted on the MGA home page at \url {http://ccg.vital-it.ch/mga}. Figure and legend taken and adapted from \citep {dreos_mga_2018}.\relax }{figure.caption.8}{}} +\newlabel{lab_projects}{{2}{9}{Published laboratory projects}{chapter.2}{}} +\@writefile{toc}{\contentsline {chapter}{Published laboratory projects}{9}{chapter.2}} +\@writefile{toc}{\contentsline {section}{\numberline {2.1}Mass Genome Annotation repository}{9}{section.2.1}} +\newlabel{section_mga}{{2.1}{9}{Mass Genome Annotation repository}{section.2.1}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.1.1}Introduction}{9}{subsection.2.1.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.1.2}MGA content and organization}{9}{subsection.2.1.2}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces \textbf {Content of the MGA repository by 2018} \textbf {A} Proportion of samples in the database grouped by type. \textbf {B} Proportion of samples grouped by organism. Assemblies belonging to the same organism are merged together. \textbf {C} Samples numbers stratified by type and organism. Dot areas are proportional to the total number of samples in that category. The corresponding numbers can be found in a weakly updated table posted on the MGA home page at \url {http://ccg.vital-it.ch/mga}. Figure and legend taken and adapted from \citep {dreos_mga_2018}.\relax }}{10}{figure.caption.9}} +\newlabel{lab_projects_mga_stats}{{2.1}{10}{\textbf {Content of the MGA repository by 2018} \textbf {A} Proportion of samples in the database grouped by type. \textbf {B} Proportion of samples grouped by organism. Assemblies belonging to the same organism are merged together. \textbf {C} Samples numbers stratified by type and organism. Dot areas are proportional to the total number of samples in that category. The corresponding numbers can be found in a weakly updated table posted on the MGA home page at \url {http://ccg.vital-it.ch/mga}. Figure and legend taken and adapted from \citep {dreos_mga_2018}.\relax }{figure.caption.9}{}} \citation{siepel_evolutionarily_2005} \citation{pollard_detection_2010} \citation{ambrosini_chip-seq_2016} \citation{ambrosini_chip-seq_2016} \citation{langmead_ultrafast_2009} \citation{langmead_fast_2012} \citation{li_sequence_2009} \citation{quinlan_bedtools:_2010} \citation{ambrosini_chip-seq_2016} \citation{ambrosini_chip-seq_2016} \citation{ambrosini_signal_2003} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.1.3}Conclusions}{9}{subsection.2.1.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.1.3}Conclusions}{11}{subsection.2.1.3}} \citation{dreos_eukaryotic_2017} \citation{dreos_epd_2013} \citation{dreos_eukaryotic_2017} \citation{dreos_epd_2013} \citation{dreos_eukaryotic_2017} \citation{bucher_compilation_1986} -\@writefile{lof}{\contentsline {figure}{\numberline {2.2}{\ignorespaces \textbf {Schematic representation of the EPDnew pipeline} \textbf {A} Download of authoritative gene catalogs and primary TSS mapping data from public databases, data repositories and consortium websites. \textbf {B} Quality control (QC) of incoming data (e.g. read mapping efficiency, contaminations, etc.). \textbf {C} Data passing QC are reformatted and incorporated into the MGA repository. \textbf {D} Selection of a subset of TSS mapping experiments for generating a new organism-specific TSS collection. \textbf {E} Input data for a new module of EPDnew. \textbf {F} Organism-specific automatic database assembly pipeline tailored to the input data, see \citep {dreos_epd_2013} for a detailed description of the human EPDnew assembly pipeline. \textbf {G} Preliminary or final TSS collection \textbf {H} Manual sanity checks of individual randomly selected promoter entries using the corresponding entry viewer. \textbf {I} Automatic quality evaluation of the TSS collections as a whole by motif enrichment tests, see Figure \ref {lab_projects_epd_motifs} for an example. \ref {L} Feedback is collected from quality evaluation steps H and I. This may lead to the exclusion, replacement or addition of source data sets or modifications (e.g. program parameter fine-tuning) of the computational database generation pipeline. Note that the development of a final, publicly released EPDnew module typically involves several evaluation-modification cycles. Figure and legend taken and adapted from \citep {dreos_eukaryotic_2017}.\relax }}{10}{figure.caption.9}} -\newlabel{lab_projects_epd_pipeline}{{2.2}{10}{\textbf {Schematic representation of the EPDnew pipeline} \textbf {A} Download of authoritative gene catalogs and primary TSS mapping data from public databases, data repositories and consortium websites. \textbf {B} Quality control (QC) of incoming data (e.g. read mapping efficiency, contaminations, etc.). \textbf {C} Data passing QC are reformatted and incorporated into the MGA repository. \textbf {D} Selection of a subset of TSS mapping experiments for generating a new organism-specific TSS collection. \textbf {E} Input data for a new module of EPDnew. \textbf {F} Organism-specific automatic database assembly pipeline tailored to the input data, see \citep {dreos_epd_2013} for a detailed description of the human EPDnew assembly pipeline. \textbf {G} Preliminary or final TSS collection \textbf {H} Manual sanity checks of individual randomly selected promoter entries using the corresponding entry viewer. \textbf {I} Automatic quality evaluation of the TSS collections as a whole by motif enrichment tests, see Figure \ref {lab_projects_epd_motifs} for an example. \ref {L} Feedback is collected from quality evaluation steps H and I. This may lead to the exclusion, replacement or addition of source data sets or modifications (e.g. program parameter fine-tuning) of the computational database generation pipeline. Note that the development of a final, publicly released EPDnew module typically involves several evaluation-modification cycles. Figure and legend taken and adapted from \citep {dreos_eukaryotic_2017}.\relax }{figure.caption.9}{}} -\@writefile{toc}{\contentsline {section}{\numberline {2.2}Eukaryotic Promoter Database}{10}{section.2.2}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.2}{\ignorespaces \textbf {Schematic representation of the EPDnew pipeline} \textbf {A} Download of authoritative gene catalogs and primary TSS mapping data from public databases, data repositories and consortium websites. \textbf {B} Quality control (QC) of incoming data (e.g. read mapping efficiency, contaminations, etc.). \textbf {C} Data passing QC are reformatted and incorporated into the MGA repository. \textbf {D} Selection of a subset of TSS mapping experiments for generating a new organism-specific TSS collection. \textbf {E} Input data for a new module of EPDnew. \textbf {F} Organism-specific automatic database assembly pipeline tailored to the input data, see \citep {dreos_epd_2013} for a detailed description of the human EPDnew assembly pipeline. \textbf {G} Preliminary or final TSS collection \textbf {H} Manual sanity checks of individual randomly selected promoter entries using the corresponding entry viewer. \textbf {I} Automatic quality evaluation of the TSS collections as a whole by motif enrichment tests, see Figure \ref {lab_projects_epd_motifs} for an example. \ref {L} Feedback is collected from quality evaluation steps H and I. This may lead to the exclusion, replacement or addition of source data sets or modifications (e.g. program parameter fine-tuning) of the computational database generation pipeline. Note that the development of a final, publicly released EPDnew module typically involves several evaluation-modification cycles. Figure and legend taken and adapted from \citep {dreos_eukaryotic_2017}.\relax }}{12}{figure.caption.10}} +\newlabel{lab_projects_epd_pipeline}{{2.2}{12}{\textbf {Schematic representation of the EPDnew pipeline} \textbf {A} Download of authoritative gene catalogs and primary TSS mapping data from public databases, data repositories and consortium websites. \textbf {B} Quality control (QC) of incoming data (e.g. read mapping efficiency, contaminations, etc.). \textbf {C} Data passing QC are reformatted and incorporated into the MGA repository. \textbf {D} Selection of a subset of TSS mapping experiments for generating a new organism-specific TSS collection. \textbf {E} Input data for a new module of EPDnew. \textbf {F} Organism-specific automatic database assembly pipeline tailored to the input data, see \citep {dreos_epd_2013} for a detailed description of the human EPDnew assembly pipeline. \textbf {G} Preliminary or final TSS collection \textbf {H} Manual sanity checks of individual randomly selected promoter entries using the corresponding entry viewer. \textbf {I} Automatic quality evaluation of the TSS collections as a whole by motif enrichment tests, see Figure \ref {lab_projects_epd_motifs} for an example. \ref {L} Feedback is collected from quality evaluation steps H and I. This may lead to the exclusion, replacement or addition of source data sets or modifications (e.g. program parameter fine-tuning) of the computational database generation pipeline. Note that the development of a final, publicly released EPDnew module typically involves several evaluation-modification cycles. Figure and legend taken and adapted from \citep {dreos_eukaryotic_2017}.\relax }{figure.caption.10}{}} +\@writefile{toc}{\contentsline {section}{\numberline {2.2}Eukaryotic Promoter Database}{12}{section.2.2}} \citation{dreos_epd_2013} \citation{dreos_eukaryotic_2015} \citation{dreos_eukaryotic_2017} \citation{ambrosini_signal_2003} \citation{ambrosini_signal_2003} \citation{dreos_eukaryotic_2017} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}Introduction}{11}{subsection.2.2.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.2}EPDnew now annotates (some of) your mushrooms and vegetables}{11}{subsection.2.2.2}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.3}Increased mapping precision in human}{11}{subsection.2.2.3}} -\@writefile{lot}{\contentsline {table}{\numberline {2.1}{\ignorespaces \textbf {Current contents of EPDnew} 'Promoters' indicate the number of TSS entries in EPDnew. 'Genes' indicates the number of genes having at least one TSS annotated in EPDnew. 'Genes' indicates the number of protein coding genes contained in the genome annotation (except for nc species). 'nc' stands for non-coding and indicates the long non-coding gene annotations. For 'nc' entries, 'genes' refers to the number of long non-coding genes present in the annotation. In parenthesis are indicated the percentages of genes having a at least one TSS annotated in EPDnew.\relax }}{12}{table.caption.10}} -\newlabel{lab_projects_epd_stats}{{2.1}{12}{\textbf {Current contents of EPDnew} 'Promoters' indicate the number of TSS entries in EPDnew. 'Genes' indicates the number of genes having at least one TSS annotated in EPDnew. 'Genes' indicates the number of protein coding genes contained in the genome annotation (except for nc species). 'nc' stands for non-coding and indicates the long non-coding gene annotations. For 'nc' entries, 'genes' refers to the number of long non-coding genes present in the annotation. In parenthesis are indicated the percentages of genes having a at least one TSS annotated in EPDnew.\relax }{table.caption.10}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.1}Introduction}{13}{subsection.2.2.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.2}EPDnew now annotates (some of) your mushrooms and vegetables}{13}{subsection.2.2.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.3}Increased mapping precision in human}{13}{subsection.2.2.3}} +\@writefile{lot}{\contentsline {table}{\numberline {2.1}{\ignorespaces \textbf {Current contents of EPDnew} 'Promoters' indicate the number of TSS entries in EPDnew. 'Genes' indicates the number of genes having at least one TSS annotated in EPDnew. 'Genes' indicates the number of protein coding genes contained in the genome annotation (except for nc species). 'nc' stands for non-coding and indicates the long non-coding gene annotations. For 'nc' entries, 'genes' refers to the number of long non-coding genes present in the annotation. In parenthesis are indicated the percentages of genes having a at least one TSS annotated in EPDnew.\relax }}{14}{table.caption.11}} +\newlabel{lab_projects_epd_stats}{{2.1}{14}{\textbf {Current contents of EPDnew} 'Promoters' indicate the number of TSS entries in EPDnew. 'Genes' indicates the number of genes having at least one TSS annotated in EPDnew. 'Genes' indicates the number of protein coding genes contained in the genome annotation (except for nc species). 'nc' stands for non-coding and indicates the long non-coding gene annotations. For 'nc' entries, 'genes' refers to the number of long non-coding genes present in the annotation. In parenthesis are indicated the percentages of genes having a at least one TSS annotated in EPDnew.\relax }{table.caption.11}{}} \citation{dreos_mga_2018} \citation{ambrosini_chip-seq_2016} \citation{ambrosini_signal_2003} \citation{dreos_epd_2013} \citation{dreos_eukaryotic_2017} \citation{raney_track_2014} -\@writefile{lof}{\contentsline {figure}{\numberline {2.3}{\ignorespaces \textbf {TSS Mapping precision} Occurrence of the TATA-box \textbf {A} and initiator \textbf {B} around \textit {H.sapiens} TSSs from EPDnew releases (004 and 006) and from a list of gene starts from UCSC Gene list, which was used as input for the generation of the EPDnew collection. This figure was created using Oprof from the SSA server \citep {ambrosini_signal_2003}. Detailed instructions to recreate the figure can be found below.\relax }}{13}{figure.caption.11}} -\newlabel{lab_projects_epd_motifs}{{2.3}{13}{\textbf {TSS Mapping precision} Occurrence of the TATA-box \textbf {A} and initiator \textbf {B} around \textit {H.sapiens} TSSs from EPDnew releases (004 and 006) and from a list of gene starts from UCSC Gene list, which was used as input for the generation of the EPDnew collection. This figure was created using Oprof from the SSA server \citep {ambrosini_signal_2003}. Detailed instructions to recreate the figure can be found below.\relax }{figure.caption.11}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.4}Integration of EPDnew with other resources}{13}{subsection.2.2.4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.5}Conclusions}{14}{subsection.2.2.5}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.6}Methods}{14}{subsection.2.2.6}} -\@writefile{toc}{\contentsline {subsubsection}{Motif occurrence profiles}{14}{subsection.2.2.6}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.3}{\ignorespaces \textbf {TSS Mapping precision} Occurrence of the TATA-box \textbf {A} and initiator \textbf {B} around \textit {H.sapiens} TSSs from EPDnew releases (004 and 006) and from a list of gene starts from UCSC Gene list, which was used as input for the generation of the EPDnew collection. This figure was created using Oprof from the SSA server \citep {ambrosini_signal_2003}. Detailed instructions to recreate the figure can be found below.\relax }}{15}{figure.caption.12}} +\newlabel{lab_projects_epd_motifs}{{2.3}{15}{\textbf {TSS Mapping precision} Occurrence of the TATA-box \textbf {A} and initiator \textbf {B} around \textit {H.sapiens} TSSs from EPDnew releases (004 and 006) and from a list of gene starts from UCSC Gene list, which was used as input for the generation of the EPDnew collection. This figure was created using Oprof from the SSA server \citep {ambrosini_signal_2003}. Detailed instructions to recreate the figure can be found below.\relax }{figure.caption.12}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.4}Integration of EPDnew with other resources}{15}{subsection.2.2.4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.5}Conclusions}{16}{subsection.2.2.5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2.6}Methods}{16}{subsection.2.2.6}} +\@writefile{toc}{\contentsline {subsubsection}{Motif occurrence profiles}{16}{subsection.2.2.6}} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_pwmscan:_2018} \citation{khan_jaspar_2018} \citation{kulakovskiy_hocomoco:_2018} \citation{ambrosini_pwmscan:_2018} \citation{langmead_ultrafast_2009} \citation{bailey_meme_2009} \citation{ambrosini_pwmscan:_2018} -\@writefile{toc}{\contentsline {section}{\numberline {2.3}PWMScan}{15}{section.2.3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.1}Introduction}{15}{subsection.2.3.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {2.4}{\ignorespaces \textbf {PWMScan workflow :} the input is composed of a PWM and a score threshold specifying the minimum score for a sequence to achieved to be considered as a match. Letter probability matrices or count matrices are also accepted and are converted into PWMs. The score threshold can also be given as a p-value or a percentage of the maximum score, in which case it is converted into a threshold score. Based on the length of the PWM, Bowtie or pwm\_scan can be used to find the matches on the genome. If Bowtie is used, the set of k-mers achieving a better score than the threshold score is computed using branch-and-bound algorithm (mba) and mapped on the genome. On the other hand, if matrix\_scan is used, the PWM is used to score every possible sub-sequence in the genome. The regions corresponding to the sequences achieving a score at least as good as the threshold score are then returned under BED format. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }}{16}{figure.caption.12}} -\newlabel{lab_projects_pwmscan_pipeline}{{2.4}{16}{\textbf {PWMScan workflow :} the input is composed of a PWM and a score threshold specifying the minimum score for a sequence to achieved to be considered as a match. Letter probability matrices or count matrices are also accepted and are converted into PWMs. The score threshold can also be given as a p-value or a percentage of the maximum score, in which case it is converted into a threshold score. Based on the length of the PWM, Bowtie or pwm\_scan can be used to find the matches on the genome. If Bowtie is used, the set of k-mers achieving a better score than the threshold score is computed using branch-and-bound algorithm (mba) and mapped on the genome. On the other hand, if matrix\_scan is used, the PWM is used to score every possible sub-sequence in the genome. The regions corresponding to the sequences achieving a score at least as good as the threshold score are then returned under BED format. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }{figure.caption.12}{}} +\@writefile{toc}{\contentsline {section}{\numberline {2.3}PWMScan}{17}{section.2.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.1}Introduction}{17}{subsection.2.3.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.4}{\ignorespaces \textbf {PWMScan workflow :} the input is composed of a PWM and a score threshold specifying the minimum score for a sequence to achieved to be considered as a match. Letter probability matrices or count matrices are also accepted and are converted into PWMs. The score threshold can also be given as a p-value or a percentage of the maximum score, in which case it is converted into a threshold score. Based on the length of the PWM, Bowtie or pwm\_scan can be used to find the matches on the genome. If Bowtie is used, the set of k-mers achieving a better score than the threshold score is computed using branch-and-bound algorithm (mba) and mapped on the genome. On the other hand, if matrix\_scan is used, the PWM is used to score every possible sub-sequence in the genome. The regions corresponding to the sequences achieving a score at least as good as the threshold score are then returned under BED format. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }}{18}{figure.caption.13}} +\newlabel{lab_projects_pwmscan_pipeline}{{2.4}{18}{\textbf {PWMScan workflow :} the input is composed of a PWM and a score threshold specifying the minimum score for a sequence to achieved to be considered as a match. Letter probability matrices or count matrices are also accepted and are converted into PWMs. The score threshold can also be given as a p-value or a percentage of the maximum score, in which case it is converted into a threshold score. Based on the length of the PWM, Bowtie or pwm\_scan can be used to find the matches on the genome. If Bowtie is used, the set of k-mers achieving a better score than the threshold score is computed using branch-and-bound algorithm (mba) and mapped on the genome. On the other hand, if matrix\_scan is used, the PWM is used to score every possible sub-sequence in the genome. The regions corresponding to the sequences achieving a score at least as good as the threshold score are then returned under BED format. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }{figure.caption.13}{}} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_chip-seq_2016} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.2}Data and methods}{17}{subsection.2.3.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.2}Data and methods}{19}{subsection.2.3.2}} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_pwmscan:_2018} \citation{ambrosini_pwmscan:_2018} \citation{hertz_identification_1990} \citation{beckstette_fast_2006} \citation{turatsinze_using_2008} \citation{heinz_simple_2010} \citation{grant_fimo:_2011} \citation{pizzi_fast_2008} -\@writefile{lof}{\contentsline {figure}{\numberline {2.5}{\ignorespaces \textbf {Benchmark :} PWMScan speed performances were measured and compared with 6 other well known genome scanners. In all cases, the h19 genome sequence was scanned with a 19bp CTCF matrix and a 11bp STAT1 matrix, 10 times. The run times are represented as boxplots. For PWMScan, both pwm\_scan and Bowtie strategies were run. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }}{18}{figure.caption.13}} -\newlabel{lab_projects_pwmscan_benchmark}{{2.5}{18}{\textbf {Benchmark :} PWMScan speed performances were measured and compared with 6 other well known genome scanners. In all cases, the h19 genome sequence was scanned with a 19bp CTCF matrix and a 11bp STAT1 matrix, 10 times. The run times are represented as boxplots. For PWMScan, both pwm\_scan and Bowtie strategies were run. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }{figure.caption.13}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.3}Benchmark}{18}{subsection.2.3.3}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.5}{\ignorespaces \textbf {Benchmark :} PWMScan speed performances were measured and compared with 6 other well known genome scanners. In all cases, the h19 genome sequence was scanned with a 19bp CTCF matrix and a 11bp STAT1 matrix, 10 times. The run times are represented as boxplots. For PWMScan, both pwm\_scan and Bowtie strategies were run. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }}{20}{figure.caption.14}} +\newlabel{lab_projects_pwmscan_benchmark}{{2.5}{20}{\textbf {Benchmark :} PWMScan speed performances were measured and compared with 6 other well known genome scanners. In all cases, the h19 genome sequence was scanned with a 19bp CTCF matrix and a 11bp STAT1 matrix, 10 times. The run times are represented as boxplots. For PWMScan, both pwm\_scan and Bowtie strategies were run. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }{figure.caption.14}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.3}Benchmark}{20}{subsection.2.3.3}} \citation{schones_statistical_2007} \citation{aerts_toucan:_2003} \citation{fu_motifviz:_2004} \citation{zhao_tred:_2005} -\@writefile{lot}{\contentsline {table}{\numberline {2.2}{\ignorespaces \textbf {Motif scanning software comparison}. The performances of matrix\_scan were assessed by comparing how many of the regions listed by matrix\_scan were also returned by other programs and if the region scores were comparable. For the percentage of overlap with the match list returned by matrix\_scan, the shorter of the two lists always serves as the reference (100\%). For the score correlations with matrix\_scan scores, the Spearman correlation was used. Table and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }}{19}{table.caption.14}} -\newlabel{lab_projects_pwmscan_benchmark_table}{{2.2}{19}{\textbf {Motif scanning software comparison}. The performances of matrix\_scan were assessed by comparing how many of the regions listed by matrix\_scan were also returned by other programs and if the region scores were comparable. For the percentage of overlap with the match list returned by matrix\_scan, the shorter of the two lists always serves as the reference (100\%). For the score correlations with matrix\_scan scores, the Spearman correlation was used. Table and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }{table.caption.14}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.4}Conclusions}{20}{subsection.2.3.4}} +\@writefile{lot}{\contentsline {table}{\numberline {2.2}{\ignorespaces \textbf {Motif scanning software comparison}. The performances of matrix\_scan were assessed by comparing how many of the regions listed by matrix\_scan were also returned by other programs and if the region scores were comparable. For the percentage of overlap with the match list returned by matrix\_scan, the shorter of the two lists always serves as the reference (100\%). For the score correlations with matrix\_scan scores, the Spearman correlation was used. Table and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }}{21}{table.caption.15}} +\newlabel{lab_projects_pwmscan_benchmark_table}{{2.2}{21}{\textbf {Motif scanning software comparison}. The performances of matrix\_scan were assessed by comparing how many of the regions listed by matrix\_scan were also returned by other programs and if the region scores were comparable. For the percentage of overlap with the match list returned by matrix\_scan, the shorter of the two lists always serves as the reference (100\%). For the score correlations with matrix\_scan scores, the Spearman correlation was used. Table and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }{table.caption.15}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.3.4}Conclusions}{22}{subsection.2.3.4}} \citation{groux_spar-k:_2019} \citation{consortium_integrated_2012} \citation{kundaje_ubiquitous_2012} \citation{nair_probabilistic_2014} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} -\@writefile{toc}{\contentsline {section}{\numberline {2.4}SPar-K}{21}{section.2.4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.1}Introduction}{21}{subsection.2.4.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.2}Methods}{21}{subsection.2.4.2}} -\@writefile{lof}{\contentsline {figure}{\numberline {2.6}{\ignorespaces Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method (Figure \ref {fig_s07}). \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{22}{figure.caption.15}} -\newlabel{lab_projects_spark_dnase}{{2.6}{22}{Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method (Figure \ref {fig_s07}). \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.15}{}} +\@writefile{toc}{\contentsline {section}{\numberline {2.4}SPar-K}{23}{section.2.4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.1}Introduction}{23}{subsection.2.4.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.2}Methods}{23}{subsection.2.4.2}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.6}{\ignorespaces Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method (Figure \ref {fig_s07}). \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{24}{figure.caption.16}} +\newlabel{lab_projects_spark_dnase}{{2.6}{24}{Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method (Figure \ref {fig_s07}). \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.16}{}} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{groux_spar-k:_2019} \citation{bailey_meme_2009} \citation{groux_spar-k:_2019} \citation{kundaje_ubiquitous_2012} -\@writefile{lof}{\contentsline {figure}{\numberline {2.7}{\ignorespaces \textbf {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{23}{figure.caption.16}} -\newlabel{lab_projects_spark_ari}{{2.7}{23}{\textbf {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.16}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {2.8}{\ignorespaces \textbf {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf {A} Seeding done at random, \textbf {B} seeding done at random and outlier smoothing \textbf {C} seeding done with the K-means++ method \textbf {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{24}{figure.caption.17}} -\newlabel{lab_projects_spark_sse}{{2.8}{24}{\textbf {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf {A} Seeding done at random, \textbf {B} seeding done at random and outlier smoothing \textbf {C} seeding done with the K-means++ method \textbf {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.17}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {2.9}{\ignorespaces \textbf {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{24}{figure.caption.18}} -\newlabel{lab_projects_spark_time}{{2.9}{24}{\textbf {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.18}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.3}Results}{25}{subsection.2.4.3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.4}Conclusion}{25}{subsection.2.4.4}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.7}{\ignorespaces \textbf {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{25}{figure.caption.17}} +\newlabel{lab_projects_spark_ari}{{2.7}{25}{\textbf {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.17}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.8}{\ignorespaces \textbf {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf {A} Seeding done at random, \textbf {B} seeding done at random and outlier smoothing \textbf {C} seeding done with the K-means++ method \textbf {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{26}{figure.caption.18}} +\newlabel{lab_projects_spark_sse}{{2.8}{26}{\textbf {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf {A} Seeding done at random, \textbf {B} seeding done at random and outlier smoothing \textbf {C} seeding done with the K-means++ method \textbf {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.18}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.9}{\ignorespaces \textbf {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{26}{figure.caption.19}} +\newlabel{lab_projects_spark_time}{{2.9}{26}{\textbf {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.19}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.3}Results}{27}{subsection.2.4.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.4.4}Conclusion}{27}{subsection.2.4.4}} \@setckpt{main/ch_group_projects}{ -\setcounter{page}{26} +\setcounter{page}{28} \setcounter{equation}{0} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{2} \setcounter{section}{4} \setcounter{subsection}{4} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{9} \setcounter{table}{2} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{0} \setcounter{algocfline}{0} \setcounter{algocfproc}{0} \setcounter{algocf}{0} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/main/ch_introduction.aux b/main/ch_introduction.aux index 019bf51..13d9c77 100644 --- a/main/ch_introduction.aux +++ b/main/ch_introduction.aux @@ -1,84 +1,97 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{mcginty_robert_k._and_tan_song_fundamentals_2014} \citation{mcginty_robert_k._and_tan_song_fundamentals_2014} \@writefile{toc}{\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} \newlabel{intro}{{1}{1}{Introduction}{chapter.1}{}} \@writefile{toc}{\contentsline {chapter}{Introduction}{1}{chapter.1}} \@writefile{toc}{\contentsline {section}{\numberline {1.1}About chromatin}{1}{section.1.1}} \@writefile{toc}{\contentsline {subsection}{\numberline {1.1.1}The chromatin structure}{1}{subsection.1.1.1}} \@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces \textbf {A} Top view of a nucleosome core particle (NCP) displayed as a ribbon representation on the left and space filling representation on the left. The NCP is made of a four hetero-dimers histone octamer around which 146-148 DNA bp wraps. The histone tails protrude out of the nucleosome core particle and are accessible to other factors, unlike the inner part of the histone octamer. Taken and modified from \cite {mcginty_robert_k._and_tan_song_fundamentals_2014}. \textbf {B} The chromatin structure. Inside eukaryotes, DNA is wrapped around histones cores forming nucleosomes. Nucleosomes can then be organized into higher-level helical-like structure, compacting the DNA. The ultimate compaction state is reached at mitosis meta-phase, when the mitotic chromosomes are visible.\relax }}{2}{figure.caption.7}} \providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}} \newlabel{intro_chromatin}{{1.1}{2}{\textbf {A} Top view of a nucleosome core particle (NCP) displayed as a ribbon representation on the left and space filling representation on the left. The NCP is made of a four hetero-dimers histone octamer around which 146-148 DNA bp wraps. The histone tails protrude out of the nucleosome core particle and are accessible to other factors, unlike the inner part of the histone octamer. Taken and modified from \cite {mcginty_robert_k._and_tan_song_fundamentals_2014}. \textbf {B} The chromatin structure. Inside eukaryotes, DNA is wrapped around histones cores forming nucleosomes. Nucleosomes can then be organized into higher-level helical-like structure, compacting the DNA. The ultimate compaction state is reached at mitosis meta-phase, when the mitotic chromosomes are visible.\relax }{figure.caption.7}{}} \citation{kouzarides_chromatin_2007} \citation{henikoff_histone_2015} \citation{jolma_methods_2011-2,mcginty_robert_k._and_tan_song_fundamentals_2014} \citation{jolma_methods_2011-2} \citation{langst_chromatin_2015} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.2}The chromatin is a dynamic structure}{3}{subsection.1.1.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.2}The chromatin is dynamic}{3}{subsection.1.1.2}} \citation{kouzarides_chromatin_2007,hyun_writing_2017} \citation{hyun_writing_2017,zhou_charting_2011} \citation{hyun_writing_2017} -\citation{} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.3}About nucleosome positioning}{4}{subsection.1.1.3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.4}Measuring nucleosome occupancy}{4}{subsection.1.1.4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.5}About chromatin domains}{4}{subsection.1.1.5}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.6}Regulatory elements}{4}{subsection.1.1.6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.7}Pioneering factors, a special class of TFs}{4}{subsection.1.1.7}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.8}Digital footprinting}{5}{subsection.1.1.8}} -\@writefile{toc}{\contentsline {section}{\numberline {1.2}About transcription factors}{5}{section.1.2}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.1}How chromatin affects TF binding}{5}{subsection.1.2.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.2}Modeling sequence specificity}{5}{subsection.1.2.2}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.3}TF co-binding}{5}{subsection.1.2.3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.4}Measuring TF binding in vivo}{5}{subsection.1.2.4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.5}Measuring TF binding in vitro}{5}{subsection.1.2.5}} -\@writefile{toc}{\contentsline {section}{\numberline {1.3}Data analysis}{5}{section.1.3}} +\citation{jiang_nucleosome_2009} +\citation{jiang_nucleosome_2009} +\citation{kubik_nucleosome_2015} +\citation{west_nucleosomal_2014} +\citation{schones_dynamic_2008,gaffney_controls_2012} +\citation{gaffney_controls_2012} +\citation{fu_insulator_2008} +\citation{jiang_nucleosome_2009} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.3}Measuring nucleosome occupancy}{4}{subsection.1.1.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.4}About nucleosome positioning}{4}{subsection.1.1.4}} +\@writefile{lof}{\contentsline {figure}{\numberline {1.2}{\ignorespaces \textbf {Nucleosome positioning} \textbf {A} Activated gene transcription start site (TSS) region. The nucleosomes located immediately downstream of the TSS show a strong positioning. The positioning of the first nucleosome can be influence by sequence preferences. Eventually the phasing is propagated to neighboring nucleosomes through statistical positioning. The nucleosome array is not anymore visible as the nucleosomes become fuzzily positioned among the cells. \textbf {B} Influence of the rotational positioning on the sequence accessibility. Left, a sequence (indicated by the black \IeC {\textquoteleft }rungs\IeC {\textquoteright } on the DNA helix) has its major groove facing toward the nucleosome outside and is accessible. Center, a 5bp rotation of the nucleosome hides the sequence as its major groove is not facing the histone octamer. Right, another 5bp rotation makes the sequence accessible again. Both images are taken and adapted from \citep {jiang_nucleosome_2009}.\relax }}{5}{figure.caption.8}} +\newlabel{intro_nucleosome_positioning}{{1.2}{5}{\textbf {Nucleosome positioning} \textbf {A} Activated gene transcription start site (TSS) region. The nucleosomes located immediately downstream of the TSS show a strong positioning. The positioning of the first nucleosome can be influence by sequence preferences. Eventually the phasing is propagated to neighboring nucleosomes through statistical positioning. The nucleosome array is not anymore visible as the nucleosomes become fuzzily positioned among the cells. \textbf {B} Influence of the rotational positioning on the sequence accessibility. Left, a sequence (indicated by the black ‘rungs’ on the DNA helix) has its major groove facing toward the nucleosome outside and is accessible. Center, a 5bp rotation of the nucleosome hides the sequence as its major groove is not facing the histone octamer. Right, another 5bp rotation makes the sequence accessible again. Both images are taken and adapted from \citep {jiang_nucleosome_2009}.\relax }{figure.caption.8}{}} +\citation{dreos_influence_2016} +\citation{jiang_nucleosome_2009} +\citation{jiang_nucleosome_2009} +\citation{trifonov_cracking_2011} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.5}About chromatin domains}{6}{subsection.1.1.5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.6}Regulatory elements}{7}{subsection.1.1.6}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.7}Pioneering factors, a special class of TFs}{7}{subsection.1.1.7}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.8}Digital footprinting}{7}{subsection.1.1.8}} +\@writefile{toc}{\contentsline {section}{\numberline {1.2}About transcription factors}{7}{section.1.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.1}How chromatin affects TF binding}{7}{subsection.1.2.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.2}Modeling sequence specificity}{7}{subsection.1.2.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.3}TF co-binding}{7}{subsection.1.2.3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.4}Measuring TF binding in vivo}{7}{subsection.1.2.4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.5}Measuring TF binding in vitro}{8}{subsection.1.2.5}} +\@writefile{toc}{\contentsline {section}{\numberline {1.3}Data analysis}{8}{section.1.3}} \@setckpt{main/ch_introduction}{ -\setcounter{page}{7} +\setcounter{page}{9} \setcounter{equation}{0} \setcounter{enumi}{0} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{1} \setcounter{section}{3} \setcounter{subsection}{0} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} -\setcounter{figure}{1} +\setcounter{figure}{2} \setcounter{table}{0} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{0} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{0} \setcounter{algocfline}{0} \setcounter{algocfproc}{0} \setcounter{algocf}{0} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/main/ch_introduction.tex b/main/ch_introduction.tex index 4fc5a92..fa4d28f 100644 --- a/main/ch_introduction.tex +++ b/main/ch_introduction.tex @@ -1,129 +1,150 @@ \cleardoublepage \chapter{Introduction} \label{intro} \markboth{Introduction}{Introduction} \addcontentsline{toc}{chapter}{Introduction} Each living organism contains DNA which is the molecular support on which genes are encoded. Genes are the hereditary unit of life and code for a set of instructions involved in all the aspects of life, from an organism development to the functions of a specific cell type. However, since all these instructions are not needed at the same time, gene expression needs to be regulated. Transcription factors (TFs) form an important class of nuclear proteins that can bind to specific DNA sequences and drive target gene expression. Thus, in order to control gene expression, the activity of TFs needs to be tightly regulated. This work report the results of different computational studies, all focuses on TFs binding sites, and explore regulatory regions of the genome, TF sequence specificity and the organization of chromatin around TF binding sites. \section{About chromatin} In eukaryotes, the DNA is stored in the nucleus. In human, each cell contains about two meters of DNA. In order to fit the DNA inside the nucleus, the cells have to organize and compact the genome while maintaining it readable. Unbeatable, evolution came out with an elegant solution : the chromatin. The chromatin is the association of the DNA with specialized proteins - the histones - around which the it wraps, resulting in a genome compaction The histone/DNA structure is also associated with other protein families such as RNA/DNA polymerases, helicases and TFs which forms the chromatin. % 1) structure, histones, nucleosomes, genome compaction % 2) nucleosome positioning, statistical positioning, sequence positioning % 3) measuring nucleosome occupancy (MNase-seq) % 4) histone PTMs, euchromatin, heterochromatin, TAD % 5) regulatory regions (TF binding, NDR, open chromatin) % 6) DGF / DNase-seq / ATAC-seq, footprint % 7) pioneering TF \subsection{The chromatin structure} % structure, histones, nucleosomes, genome compaction -\begin{figure}[!htbp] +\begin{figure} \begin{center} \includegraphics[scale=0.2]{images/ch_introduction/chromatin.png} \captionof{figure}{\textbf{A} Top view of a nucleosome core particle (NCP) displayed as a ribbon representation on the left and space filling representation on the left. The NCP is made of a four hetero-dimers histone octamer around which 146-148 DNA bp wraps. The histone tails protrude out of the nucleosome core particle and are accessible to other factors, unlike the inner part of the histone octamer. Taken and modified from \cite{mcginty_robert_k._and_tan_song_fundamentals_2014}. \textbf{B} The chromatin structure. Inside eukaryotes, DNA is wrapped around histones cores forming nucleosomes. Nucleosomes can then be organized into higher-level helical-like structure, compacting the DNA. The ultimate compaction state is reached at mitosis meta-phase, when the mitotic chromosomes are visible.} \label{intro_chromatin} \end{center} \end{figure} % histones -In human, there are four major (canonical) histones : H2A, H2B, H3 and H4. These four histones are found assembled together into an octamer, composed of two H2A/H2B and two H3/H4 hetero-dimers, around which ~146/8bp of DNA wrap (Figure \ref{intro_chromatin}A). This structure is called the nucleosome core particule (that I will later simply refer to as "nucleosome"). +In human, there are four major (canonical) histones : H2A, H2B, H3 and H4. These four histones are found assembled together into an octamer, composed of two H2A/H2B and two H3/H4 hetero-dimers, around which ~146/8bp of DNA wrap (Figure \ref{intro_chromatin}A), forming the nucleosome core particule (that I will later simply refer to as "nucleosome"). The DNA is kept wrapped around the histone octamer because of strong electrostatic interactions. Indeed, the DNA backbone, which is negatively charged in nuclear conditions, shows a high affinity for the positively charged histones. As a consequence, the nucleosome is a quite stable structure. The histones proteins are highly conserved among eukaryotes at both the sequence and the structure level. All the histones share the overall same design. They are composed of a N-terminal tail, a central histone-fold domain and of C-terminal tail. Histones associate with each other through their histone-fold domains which compose the center of the nucleosome. In contrast, the histone N-terminal tails are extruding out from the nucleosome and are hotspots for post-translational modifications (PTMs) \citep{kouzarides_chromatin_2007}. For completeness, it should be mentioned that "variant histones" - also called "replacement histone", by opposition to the "canonical replicative histones" - exist and can replace canonical histones in nucleosomes, at specific genome locations. to fulfill dedicated functions \citep{henikoff_histone_2015}. However, this topic is outside of this work scope. % chromatin fibers The genome is organized into a repetition of nucleosomes, each separated by a linker DNA, forming the 11-nm chromatin fiber. Under this conformation, the chromatin is quite relaxed and the DNA accessible. The 11-nm fiber is itself stored into a more compacted structure called the 30-nm fiber (Figure \ref{intro_chromatin}B). Under the inclusion of the H1 linker histone, the nucleosome arrays are organized into a higher level structure, diminishing the linker DNA accessibility and further increasing the genome compaction level \citep{jolma_methods_2011-2, mcginty_robert_k._and_tan_song_fundamentals_2014}. % compaction It is now commonly accepted that the compaction of the genome comes with a trade-off. The DNA sequences found in nucleosomes are though the be unaccessible for DNA reading processes such as TF binding whereas the linker DNA remains accessible \citep{jolma_methods_2011-2}. Thus storing the genome impedes its readability. Because transcribing genes is all about reading the DNA template, the state of the chromatin eventually impact gene expression. Consequently, the cell faces a situation where it needs to keep only the immediately useful genomic regions readable while keeping the ability to open/close other regions on demand. -\subsection{The chromatin is a dynamic structure} +\subsection{The chromatin is dynamic} % chromatin modification/remodelling -Because the cell needs may vary, for instance because of lineage commitment, the chromatin structure needs to be adapted. Some regions needs to become accessible in order to be read while other are not needed anymore. Consequently, the chromatin is a highly dynamic structure that undergoes constant modifications. Two broad families of chromatin modifier complexes exist : ATPase chromatin remodelers and histone modifiers. +Because the cell needs may vary, for instance because of lineage commitment, the chromatin structure needs to be adapted. Some regions needs to become accessible in order to be read while other are not needed anymore. Consequently, the chromatin is a highly dynamic structure that undergoes constant modifications. Two broad families of chromatin modifier exist : ATPase chromatin remodelers and histone modifiers. % chromatin remodelers -ATPase chromatin remodelers is a group of proteins that are able to affect the chromatin packaging by interfering directly with the nucleosome, at the cost of hydrolyzing ATP molecules. Chromatin remodelers can be subdivided into 4 sub-groups, each fulfilling a different function \citep{langst_chromatin_2015}. SWI/SNF members can slide and/or to evict nucleosomes from DNA and are linked with chromatin opening. ISWI members tend to recognize unmodified H4 histone and catalyze nucleosome spacing and chromatin compaction. CHD members are less well functionally characterized but bear chromo domains that allows them to recognize histone methylation. Finally, INO80 seems to be able to slide and evict nucleosomes and to recognize Hollidays junction and the DNA replication fork, suggesting a role for DNA repair and replication. +ATPase chromatin remodelers are a group of proteins that are able to affect the chromatin packaging by interfering directly with the nucleosome, at the cost of hydrolyzing ATP molecules. Chromatin remodelers can be subdivided into 4 sub-groups, each fulfilling a different function \citep{langst_chromatin_2015}. SWI/SNF members can slide and/or to evict nucleosomes from DNA and are linked with chromatin opening. ISWI members tend to recognize unmodified H4 histone and catalyze nucleosome spacing and chromatin compaction. CHD members are less well functionally characterized but bear chromo domains that allows them to recognize histone methylation. Finally, INO80 members seem to be able to slide and evict nucleosomes and to recognize Hollidays junction and the DNA replication fork, suggesting a role in DNA repair and replication. % histone modifiers Histone modifiers are enzymes that can deposite PTMs on the histone tails. Different types of PTMs exist such as acetylation or methylation. Each histone has several residues that can be modified, sometimes together. This leads to an astonishingly high number of possibilities. So far more than a hundred histone PTMs have been identified, each linked with different biological functions. If the deposition of PTMs is made by dedicated factors (also called writers), this is also true for their recognitions \citep{kouzarides_chromatin_2007,hyun_writing_2017}. The direct consequence is that histone PTMs are used to recruit specific factors at given genomic location. For instance, H3 lysine 4 di-methylation (H3K4me2) has been shown to be enriched at the promoters of actively transcribed genes and at enhancers \citep{hyun_writing_2017,zhou_charting_2011} and to be specifically recognized by CHD1, a member of the CHD chromatin remodelers \citep{hyun_writing_2017}. +\subsection{Measuring nucleosome occupancy} +MNAase-seq + \subsection{About nucleosome positioning} -statistical positioning, sequence positioning +% statistical positioning, sequence positioning + +\begin{figure} +\begin{center} + \includegraphics[scale=0.2]{images/ch_introduction/nucleosome_positioning.png} + \captionof{figure}{\textbf{Nucleosome positioning} \textbf{A} Activated gene transcription start site (TSS) region. The nucleosomes located immediately downstream of the TSS show a strong positioning. The positioning of the first nucleosome can be influence by sequence preferences. Eventually the phasing is propagated to neighboring nucleosomes through statistical positioning. The nucleosome array is not anymore visible as the nucleosomes become fuzzily positioned among the cells. \textbf{B} Influence of the rotational positioning on the sequence accessibility. Left, a sequence (indicated by the black ‘rungs’ on the DNA helix) has its major groove facing toward the nucleosome outside and is accessible. Center, a 5bp rotation of the nucleosome hides the sequence as its major groove is not facing the histone octamer. Right, another 5bp rotation makes the sequence accessible again. Both images are taken and adapted from \citep{jiang_nucleosome_2009}.} +\label{intro_nucleosome_positioning} +\end{center} +\end{figure} + +The advent of MNase-seq allowed to draw high resolution maps of nucleosome occupancy in many species, for instance in yeast \citep{kubik_nucleosome_2015}, mouse \citep{west_nucleosomal_2014}, human \citep{schones_dynamic_2008, gaffney_controls_2012}. + +% strongly positioned nucleosomes +The wealth of data collected allowed to determined that nucleosomes are not packaging the genome by covering it uniformly. Nucleosome rather seems to show preferred location were they sit at. Interestingly, single nucleosome can be visualize from batch sequencing experiment, indicating that an important fraction of the cells all bear a nucleosome at the same position. In these cases, the nucleosome is said to be "phased" or "strongly positioned" (see Figure \ref{intro_nucleosome_positioning}A). + +% statistical positioning +Nucleosome arrays are a striking case of strongly positioned nucleosome. Arrays can be seen at throughout the human genome \citep{gaffney_controls_2012}. However, there are regions where they are enriched, for instance at the CCCTC-binding factor (CTCF) binding sites \citep{fu_insulator_2008}. In this case, it has been proposed that the arrays result from the nucleosomes organizing with respect to a barrier (or anchor). In this case, the barrier is CTCF. The regular array organization has been proposed to be propagated far from their anchors because the immediately flanking nucleosome positioning are constrained by the barrier. In turn, these nucleosomes constrain the lateral freedom of movement of the following ones, and so one. Eventually, an array is formed. However, the degree of constrain diminishes at each new nucleosome, leading the signal to blur out at some point. There, the nucleosomes are not sufficiently phased anymore throughout the cell population. This model is called the "statistical positioning" \citep{jiang_nucleosome_2009}. + +% effect of sequence +Another important driver of nucleosome positioning is the DNA sequence. For instance, strongly positioned nucleosomes are also visible at the transcription start sites (TSSs) of activated genes. In this case, the DNA sequence composition seem to be a major factor influencing the nucleosome positioning \citep{dreos_influence_2016}. Because the DNA is wrapped around the histone octamer, the bound DNA is required to be bendable. WW (W=A/T) and SS (S=C/G) dinucleotides have been shown to curve the DNA by extending the major and the minor groove respectively \citep{jiang_nucleosome_2009}. However, because the major and minor grooves precess around the DNA helix axis, each groove alternatively face the nucleosome center (the histone octamer) and the nucleosome outside (the opposite direction) every ~5bp (thus the DNA helix periodicity is ~10.4bp, see Figure \ref{intro_nucleosome_positioning}B). Consequently, dinucleotides favoring DNA flexibility are required to occur at different locations around the nucleosome, according to their effect on the DNA helix structure. For instance, stretching the major groove needs to occur when it is facing the nucleosome outside, to force the adjacent DNA segment to be curved toward (around) the nucleosome center. This is referred to as "rotational positioning" \citep{jiang_nucleosome_2009}. Interestingly, if a nucleosome is bound to a favorable sequence, the next most likely favorable binding sites are located 10bp upstream or downstream. These correspond to the locations at which all the dinucleotides will reacquire the same orientation with respect to the histone octamer. However, any other nucleosome displacement will modify the orientations of the sequences wrapped around. This has the potential of making them accessible - if they are oriented toward the nucleosome outside - or hidden - if they are facing the nucleosome core (Figure \ref{intro_nucleosome_positioning}B). In 2011, Trifonov identified the YRRRRRYYYYYR (where R=A/G and Y=C/T) consensus sequence to be a nucleosome positioning sequence matching these criteria \citep{trifonov_cracking_2011}. The first and last positions indicate the cyclic nature of this pattern. + +In vivo, both statistical and rotational positioning occur. Additionally, chromatin remodelers are also constantly catalyzing thermodynamically unfavorable nucleosome displacement reaction in exchange of ATP hydrolysis. It is likely that each nucleosome is subjected to all of these phenomenons. However, one may be predominant over the others. -\subsection{Measuring nucleosome occupancy} -MNAase-seq \subsection{About chromatin domains} histone PTMs, euchromatin, heterochromatin, TAD -\citep{} \subsection{Regulatory elements} TF binding, motifs, NDR, open chromatin \subsection{Pioneering factors, a special class of TFs} pioneering TFs \subsection{Digital footprinting} DGF / DNase-seq / ATAC-seq, footprint \section{About transcription factors} Transcription factors (TFs) are a special class of proteins that posses a DNA binding domain (DBD). This DBD allows them to recognizes specific DNA sequences and to selectively bind them. Another few words about TF sequence specificity / affinity 1) specificity models, additivity, sequence scoring given model 2) TF complexes 3) co-binding scenarios 4) in vivo (ChIP-seq) 5) in vitro (HT-SELEX, PBM, B1H) \subsection{How chromatin affects TF binding} Jolma and Taipale book 2011 chapter 9 Jolma and Taipale book 2011 chapter 11 \subsection{Modeling sequence specificity} models, additivity, sequence scoring given model \subsection{TF co-binding} TF complexe, homo-dimer, hetero-dimers, independent co-binding Jolma and Taipale book 2011 chapter 8 Jolman and Taipale book 2011 chapter 11.4 (nucleosome breathing / TFs cooperate to evict nucleosome and open chromatin) \subsection{Measuring TF binding in vivo} ChIP-seq \subsection{Measuring TF binding in vitro} HT-SELEX, PBM, B1H \section{Data analysis} current technologies limitations cannot assess exactly the binding site, both using ChIP-seq or in vitro measures -> need to realign the data -> need to reorient the data Gaffney et al and ArchAlign proposed realignment procedures for MNase-seq Lawrence and Rilley proposed realignment procedure for sequences MEME as well both cases, data heterogeneity is captured -> clustering to resolve this, list some methods (NOT MINE) diff --git a/main/ch_smile-seq.aux b/main/ch_smile-seq.aux index f94d43e..c6478e5 100644 --- a/main/ch_smile-seq.aux +++ b/main/ch_smile-seq.aux @@ -1,85 +1,85 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{isakova_smile-seq_2017} \citation{isakova_smile-seq_2017} \citation{isakova_smile-seq_2017} \citation{maerkl_systems_2007} \citation{berger_universal_2009} \citation{zhao_inferring_2009,jolma_multiplexed_2010} \citation{isakova_smile-seq_2017} -\@writefile{toc}{\contentsline {chapter}{\numberline {4}SMiLE-seq data analysis}{53}{chapter.4}} +\@writefile{toc}{\contentsline {chapter}{\numberline {4}SMiLE-seq data analysis}{55}{chapter.4}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} -\newlabel{smile_seq}{{4}{53}{SMiLE-seq data analysis}{chapter.4}{}} -\@writefile{toc}{\contentsline {chapter}{SMiLE-seq data analysis}{53}{chapter.4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.1}Introduction}{53}{subsection.4.0.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces \textbf {SMiLE-seq pipeline :} \textbf {a} Schematic representation of the experimental setup. A snapshot of three units of the microfluidic device is shown. In vitro transcribed and translated bait TF, target dsDNA, and a nonspecific competitor poly-dIdC are mixed and pipetted in one of the wells of the microfluidic device. The mixtures are then passively pumped in the device (bottom panel). Newly formed TF\IeC {\textendash }DNA complexes are trapped under a flexible polydimethylsiloxane membrane, and unbound molecules as well as molecular complexes are washed away (upper panel). Left, schematic representation of three individual chambers. Right, corresponding snapshots of an individual chamber taken before and after mechanical trapping. \textbf {b} Data processing pipeline. The bound DNA is eluted from all the units of the device simultaneously and collected in one tube. Recovered DNA is amplified and sequenced. The sequencing reads are then demultiplexed, and a seed sequence is identified for each sample. This seed is then used to initialize a probability matrix representing the sequence specificity model for the given TF. The model parameters are then optimized using a Hidden Markov Model-based motif discovery pipeline. Figure and legend taken and adapted from \citep {isakova_smile-seq_2017}.\relax }}{54}{figure.caption.28}} -\newlabel{smile_seq_pipeline}{{4.1}{54}{\textbf {SMiLE-seq pipeline :} \textbf {a} Schematic representation of the experimental setup. A snapshot of three units of the microfluidic device is shown. In vitro transcribed and translated bait TF, target dsDNA, and a nonspecific competitor poly-dIdC are mixed and pipetted in one of the wells of the microfluidic device. The mixtures are then passively pumped in the device (bottom panel). Newly formed TF–DNA complexes are trapped under a flexible polydimethylsiloxane membrane, and unbound molecules as well as molecular complexes are washed away (upper panel). Left, schematic representation of three individual chambers. Right, corresponding snapshots of an individual chamber taken before and after mechanical trapping. \textbf {b} Data processing pipeline. The bound DNA is eluted from all the units of the device simultaneously and collected in one tube. Recovered DNA is amplified and sequenced. The sequencing reads are then demultiplexed, and a seed sequence is identified for each sample. This seed is then used to initialize a probability matrix representing the sequence specificity model for the given TF. The model parameters are then optimized using a Hidden Markov Model-based motif discovery pipeline. Figure and legend taken and adapted from \citep {isakova_smile-seq_2017}.\relax }{figure.caption.28}{}} +\newlabel{smile_seq}{{4}{55}{SMiLE-seq data analysis}{chapter.4}{}} +\@writefile{toc}{\contentsline {chapter}{SMiLE-seq data analysis}{55}{chapter.4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.1}Introduction}{55}{subsection.4.0.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces \textbf {SMiLE-seq pipeline :} \textbf {a} Schematic representation of the experimental setup. A snapshot of three units of the microfluidic device is shown. In vitro transcribed and translated bait TF, target dsDNA, and a nonspecific competitor poly-dIdC are mixed and pipetted in one of the wells of the microfluidic device. The mixtures are then passively pumped in the device (bottom panel). Newly formed TF\IeC {\textendash }DNA complexes are trapped under a flexible polydimethylsiloxane membrane, and unbound molecules as well as molecular complexes are washed away (upper panel). Left, schematic representation of three individual chambers. Right, corresponding snapshots of an individual chamber taken before and after mechanical trapping. \textbf {b} Data processing pipeline. The bound DNA is eluted from all the units of the device simultaneously and collected in one tube. Recovered DNA is amplified and sequenced. The sequencing reads are then demultiplexed, and a seed sequence is identified for each sample. This seed is then used to initialize a probability matrix representing the sequence specificity model for the given TF. The model parameters are then optimized using a Hidden Markov Model-based motif discovery pipeline. Figure and legend taken and adapted from \citep {isakova_smile-seq_2017}.\relax }}{56}{figure.caption.29}} +\newlabel{smile_seq_pipeline}{{4.1}{56}{\textbf {SMiLE-seq pipeline :} \textbf {a} Schematic representation of the experimental setup. A snapshot of three units of the microfluidic device is shown. In vitro transcribed and translated bait TF, target dsDNA, and a nonspecific competitor poly-dIdC are mixed and pipetted in one of the wells of the microfluidic device. The mixtures are then passively pumped in the device (bottom panel). Newly formed TF–DNA complexes are trapped under a flexible polydimethylsiloxane membrane, and unbound molecules as well as molecular complexes are washed away (upper panel). Left, schematic representation of three individual chambers. Right, corresponding snapshots of an individual chamber taken before and after mechanical trapping. \textbf {b} Data processing pipeline. The bound DNA is eluted from all the units of the device simultaneously and collected in one tube. Recovered DNA is amplified and sequenced. The sequencing reads are then demultiplexed, and a seed sequence is identified for each sample. This seed is then used to initialize a probability matrix representing the sequence specificity model for the given TF. The model parameters are then optimized using a Hidden Markov Model-based motif discovery pipeline. Figure and legend taken and adapted from \citep {isakova_smile-seq_2017}.\relax }{figure.caption.29}{}} \citation{isakova_smile-seq_2017} \citation{isakova_smile-seq_2017} \citation{weirauch_evaluation_2013} -\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces \textbf {Example of a Hidden Markov model :} initial HMM representation with a seed sequence 'ATGCC'. The upper Markov chain models + strand motif containing sequences, the middle one - strand motif containing sequences and the lower zero motif occurrence sequences. The FB, FE, RB and RE positions represents positions in the sequence that occur before and after the binding site on the forward and reverse strand. For these nodes, a self transition exist to allow the binding site to occur at a variable distance from the beginning and the end of the sequence. Once transiting toward the 1st position of the binding site, the next transition is forced toward the 2nd position in the binding site, and so on until the end of the binding site. The + strand and - strand Markov chains emission parameters are paired together (they have the same values), as represented by the grey dashed lines. The transition probabilities in red are not subjected to the Baum-Welch training. Finally, a binding model represented as a probability matrix is composed of the emission probabilities at the binding site positions. Figure and legend taken and adapted from \citep {isakova_smile-seq_2017}\relax }}{55}{figure.caption.29}} -\newlabel{smile_seq_hmm}{{4.2}{55}{\textbf {Example of a Hidden Markov model :} initial HMM representation with a seed sequence 'ATGCC'. The upper Markov chain models + strand motif containing sequences, the middle one - strand motif containing sequences and the lower zero motif occurrence sequences. The FB, FE, RB and RE positions represents positions in the sequence that occur before and after the binding site on the forward and reverse strand. For these nodes, a self transition exist to allow the binding site to occur at a variable distance from the beginning and the end of the sequence. Once transiting toward the 1st position of the binding site, the next transition is forced toward the 2nd position in the binding site, and so on until the end of the binding site. The + strand and - strand Markov chains emission parameters are paired together (they have the same values), as represented by the grey dashed lines. The transition probabilities in red are not subjected to the Baum-Welch training. Finally, a binding model represented as a probability matrix is composed of the emission probabilities at the binding site positions. Figure and legend taken and adapted from \citep {isakova_smile-seq_2017}\relax }{figure.caption.29}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.2}Hidden Markov Model Motif discovery}{55}{subsection.4.0.2}} -\newlabel{section_smileseq_hmm}{{4.0.2}{55}{Hidden Markov Model Motif discovery}{subsection.4.0.2}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces \textbf {Example of a Hidden Markov model :} initial HMM representation with a seed sequence 'ATGCC'. The upper Markov chain models + strand motif containing sequences, the middle one - strand motif containing sequences and the lower zero motif occurrence sequences. The FB, FE, RB and RE positions represents positions in the sequence that occur before and after the binding site on the forward and reverse strand. For these nodes, a self transition exist to allow the binding site to occur at a variable distance from the beginning and the end of the sequence. Once transiting toward the 1st position of the binding site, the next transition is forced toward the 2nd position in the binding site, and so on until the end of the binding site. The + strand and - strand Markov chains emission parameters are paired together (they have the same values), as represented by the grey dashed lines. The transition probabilities in red are not subjected to the Baum-Welch training. Finally, a binding model represented as a probability matrix is composed of the emission probabilities at the binding site positions. Figure and legend taken and adapted from \citep {isakova_smile-seq_2017}\relax }}{57}{figure.caption.30}} +\newlabel{smile_seq_hmm}{{4.2}{57}{\textbf {Example of a Hidden Markov model :} initial HMM representation with a seed sequence 'ATGCC'. The upper Markov chain models + strand motif containing sequences, the middle one - strand motif containing sequences and the lower zero motif occurrence sequences. The FB, FE, RB and RE positions represents positions in the sequence that occur before and after the binding site on the forward and reverse strand. For these nodes, a self transition exist to allow the binding site to occur at a variable distance from the beginning and the end of the sequence. Once transiting toward the 1st position of the binding site, the next transition is forced toward the 2nd position in the binding site, and so on until the end of the binding site. The + strand and - strand Markov chains emission parameters are paired together (they have the same values), as represented by the grey dashed lines. The transition probabilities in red are not subjected to the Baum-Welch training. Finally, a binding model represented as a probability matrix is composed of the emission probabilities at the binding site positions. Figure and legend taken and adapted from \citep {isakova_smile-seq_2017}\relax }{figure.caption.30}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.2}Hidden Markov Model Motif discovery}{57}{subsection.4.0.2}} +\newlabel{section_smileseq_hmm}{{4.0.2}{57}{Hidden Markov Model Motif discovery}{subsection.4.0.2}{}} \citation{schutz_mamot:_2008} \citation{orenstein_comparative_2014} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.3}Binding motif evaluation}{56}{subsection.4.0.3}} -\newlabel{section_smileseq_pwmeval}{{4.0.3}{56}{Binding motif evaluation}{subsection.4.0.3}{}} -\newlabel{smile_seq_pwmeval_score}{{4.1}{57}{Binding motif evaluation}{equation.4.0.1}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.3}Binding motif evaluation}{58}{subsection.4.0.3}} +\newlabel{section_smileseq_pwmeval}{{4.0.3}{58}{Binding motif evaluation}{subsection.4.0.3}{}} +\newlabel{smile_seq_pwmeval_score}{{4.1}{59}{Binding motif evaluation}{equation.4.0.1}{}} \citation{jolma_dna-binding_2013} \citation{mathelier_jaspar_2014} \citation{kulakovskiy_hocomoco:_2016} -\newlabel{smile_seq_algo_auc}{{2}{58}{Binding motif evaluation}{algocfline.2}{}} -\@writefile{loa}{\contentsline {algocf}{\numberline {2}{\ignorespaces Computes the AUC-ROC\relax }}{58}{algocf.2}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.4}Results}{58}{subsection.4.0.4}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textbf {Predictive power of SMiLE-seq :} \textbf {A} the motifs compared to that of previously reported motifs that are retrievable from the indicated databases. For each motif, the AUC-ROC values on the 500 top peaks of the ENCODE ChIP-seq data sets for the corresponding TF was computed. The heatmap represents the AUC values computed for each method on the respective ChIP-seq data sets that were selected based on the highest mean AUC values among all five models. \textbf {B} the predictive performances of MAX and YY1 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }}{59}{figure.caption.30}} -\newlabel{smileseq_auc}{{4.3}{59}{\textbf {Predictive power of SMiLE-seq :} \textbf {A} the motifs compared to that of previously reported motifs that are retrievable from the indicated databases. For each motif, the AUC-ROC values on the 500 top peaks of the ENCODE ChIP-seq data sets for the corresponding TF was computed. The heatmap represents the AUC values computed for each method on the respective ChIP-seq data sets that were selected based on the highest mean AUC values among all five models. \textbf {B} the predictive performances of MAX and YY1 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }{figure.caption.30}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.5}Conclusions}{60}{subsection.4.0.5}} +\newlabel{smile_seq_algo_auc}{{2}{60}{Binding motif evaluation}{algocfline.2}{}} +\@writefile{loa}{\contentsline {algocf}{\numberline {2}{\ignorespaces Computes the AUC-ROC\relax }}{60}{algocf.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.4}Results}{60}{subsection.4.0.4}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textbf {Predictive power of SMiLE-seq :} \textbf {A} the motifs compared to that of previously reported motifs that are retrievable from the indicated databases. For each motif, the AUC-ROC values on the 500 top peaks of the ENCODE ChIP-seq data sets for the corresponding TF was computed. The heatmap represents the AUC values computed for each method on the respective ChIP-seq data sets that were selected based on the highest mean AUC values among all five models. \textbf {B} the predictive performances of MAX and YY1 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }}{61}{figure.caption.31}} +\newlabel{smileseq_auc}{{4.3}{61}{\textbf {Predictive power of SMiLE-seq :} \textbf {A} the motifs compared to that of previously reported motifs that are retrievable from the indicated databases. For each motif, the AUC-ROC values on the 500 top peaks of the ENCODE ChIP-seq data sets for the corresponding TF was computed. The heatmap represents the AUC values computed for each method on the respective ChIP-seq data sets that were selected based on the highest mean AUC values among all five models. \textbf {B} the predictive performances of MAX and YY1 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }{figure.caption.31}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.0.5}Conclusions}{62}{subsection.4.0.5}} \@setckpt{main/ch_smile-seq}{ -\setcounter{page}{61} +\setcounter{page}{63} \setcounter{equation}{1} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{4} \setcounter{section}{0} \setcounter{subsection}{5} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{3} \setcounter{table}{0} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{17} \setcounter{algocfline}{2} \setcounter{algocfproc}{2} \setcounter{algocf}{2} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/my_thesis.aux b/my_thesis.aux index e95d7ee..87a64b4 100644 --- a/my_thesis.aux +++ b/my_thesis.aux @@ -1,170 +1,170 @@ \relax \providecommand\hyper@newdestlabel[2]{} \providecommand\BKM@entry[2]{} \catcode `:\active \catcode `;\active \catcode `!\active \catcode `?\active \catcode `"\active \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined \global\let\oldcontentsline\contentsline \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} \global\let\oldnewlabel\newlabel \gdef\newlabel#1#2{\newlabelxx{#1}#2} \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} \AtEndDocument{\ifx\hyper@anchor\@undefined \let\contentsline\oldcontentsline \let\newlabel\oldnewlabel \fi} \fi} \global\let\hyper@last\relax \gdef\HyperFirstAtBeginDocument#1{#1} \providecommand\HyField@AuxAddToFields[1]{} \providecommand\HyField@AuxAddToCoFields[2]{} \providecommand \oddpage@label [2]{} \babel@aux{english}{} \babel@aux{french}{} \babel@aux{english}{} \@input{head/dedication.aux} \@input{head/acknowledgements.aux} \BKM@entry{id=1,dest={636861707465722A2E31},srcline={3}}{41636B6E6F776C656467656D656E7473} \pgfsyspdfmark {pgfid2}{0}{40463552} \pgfsyspdfmark {pgfid1}{6}{40498788} \@input{head/preface.aux} \BKM@entry{id=2,dest={636861707465722A2E32},srcline={4}}{50726566616365} \pgfsyspdfmark {pgfid4}{0}{40463552} \pgfsyspdfmark {pgfid3}{6}{40498788} \@input{head/abstracts.aux} \BKM@entry{id=3,dest={636861707465722A2E33},srcline={9}}{4162737472616374205C28456E676C6973682F4672616E5C3334376169732F446575747363685C29} \pgfsyspdfmark {pgfid6}{0}{40463552} \pgfsyspdfmark {pgfid5}{6}{40498788} \pgfsyspdfmark {pgfid8}{0}{40463552} \pgfsyspdfmark {pgfid7}{6}{40498788} \pgfsyspdfmark {pgfid10}{0}{40463552} \pgfsyspdfmark {pgfid9}{6}{40498788} \BKM@entry{id=4,dest={746F632E30},srcline={30}}{436F6E74656E7473} \pgfsyspdfmark {pgfid12}{0}{40463552} \pgfsyspdfmark {pgfid11}{6}{40498788} \@input{main/ch_introduction.aux} \BKM@entry{id=5,dest={636861707465722E31},srcline={2}}{496E74726F64756374696F6E} \BKM@entry{id=6,dest={636861707465722E31},srcline={5}}{496E74726F64756374696F6E} \BKM@entry{id=7,dest={73656374696F6E2E312E31},srcline={14}}{41626F7574206368726F6D6174696E} \BKM@entry{id=8,dest={73756273656374696F6E2E312E312E31},srcline={26}}{546865206368726F6D6174696E20737472756374757265} \pgfsyspdfmark {pgfid14}{0}{40463552} \pgfsyspdfmark {pgfid13}{6}{40511883} -\BKM@entry{id=9,dest={73756273656374696F6E2E312E312E32},srcline={50}}{546865206368726F6D6174696E20697320612064796E616D696320737472756374757265} -\BKM@entry{id=10,dest={73756273656374696F6E2E312E312E33},srcline={61}}{41626F7574206E75636C656F736F6D6520706F736974696F6E696E67} -\BKM@entry{id=11,dest={73756273656374696F6E2E312E312E34},srcline={64}}{4D6561737572696E67206E75636C656F736F6D65206F63637570616E6379} -\BKM@entry{id=12,dest={73756273656374696F6E2E312E312E35},srcline={67}}{41626F7574206368726F6D6174696E20646F6D61696E73} -\BKM@entry{id=13,dest={73756273656374696F6E2E312E312E36},srcline={72}}{526567756C61746F727920656C656D656E7473} -\BKM@entry{id=14,dest={73756273656374696F6E2E312E312E37},srcline={75}}{50696F6E656572696E6720666163746F72732C2061207370656369616C20636C617373206F6620544673} -\BKM@entry{id=15,dest={73756273656374696F6E2E312E312E38},srcline={78}}{4469676974616C20666F6F747072696E74696E67} -\BKM@entry{id=16,dest={73656374696F6E2E312E32},srcline={83}}{41626F7574207472616E736372697074696F6E20666163746F7273} -\BKM@entry{id=17,dest={73756273656374696F6E2E312E322E31},srcline={95}}{486F77206368726F6D6174696E20616666656374732054462062696E64696E67} -\BKM@entry{id=18,dest={73756273656374696F6E2E312E322E32},srcline={100}}{4D6F64656C696E672073657175656E6365207370656369666963697479} -\BKM@entry{id=19,dest={73756273656374696F6E2E312E322E33},srcline={103}}{544620636F2D62696E64696E67} -\BKM@entry{id=20,dest={73756273656374696F6E2E312E322E34},srcline={109}}{4D6561737572696E672054462062696E64696E6720696E207669766F} -\BKM@entry{id=21,dest={73756273656374696F6E2E312E322E35},srcline={112}}{4D6561737572696E672054462062696E64696E6720696E20766974726F} -\BKM@entry{id=22,dest={73656374696F6E2E312E33},srcline={117}}{4461746120616E616C79736973} +\BKM@entry{id=9,dest={73756273656374696F6E2E312E312E32},srcline={50}}{546865206368726F6D6174696E2069732064796E616D6963} +\BKM@entry{id=10,dest={73756273656374696F6E2E312E312E33},srcline={60}}{4D6561737572696E67206E75636C656F736F6D65206F63637570616E6379} +\BKM@entry{id=11,dest={73756273656374696F6E2E312E312E34},srcline={64}}{41626F7574206E75636C656F736F6D6520706F736974696F6E696E67} +\BKM@entry{id=12,dest={73756273656374696F6E2E312E312E35},srcline={89}}{41626F7574206368726F6D6174696E20646F6D61696E73} +\BKM@entry{id=13,dest={73756273656374696F6E2E312E312E36},srcline={93}}{526567756C61746F727920656C656D656E7473} +\BKM@entry{id=14,dest={73756273656374696F6E2E312E312E37},srcline={96}}{50696F6E656572696E6720666163746F72732C2061207370656369616C20636C617373206F6620544673} +\BKM@entry{id=15,dest={73756273656374696F6E2E312E312E38},srcline={99}}{4469676974616C20666F6F747072696E74696E67} +\BKM@entry{id=16,dest={73656374696F6E2E312E32},srcline={104}}{41626F7574207472616E736372697074696F6E20666163746F7273} +\BKM@entry{id=17,dest={73756273656374696F6E2E312E322E31},srcline={116}}{486F77206368726F6D6174696E20616666656374732054462062696E64696E67} +\BKM@entry{id=18,dest={73756273656374696F6E2E312E322E32},srcline={121}}{4D6F64656C696E672073657175656E6365207370656369666963697479} +\BKM@entry{id=19,dest={73756273656374696F6E2E312E322E33},srcline={124}}{544620636F2D62696E64696E67} +\BKM@entry{id=20,dest={73756273656374696F6E2E312E322E34},srcline={130}}{4D6561737572696E672054462062696E64696E6720696E207669766F} +\BKM@entry{id=21,dest={73756273656374696F6E2E312E322E35},srcline={133}}{4D6561737572696E672054462062696E64696E6720696E20766974726F} +\BKM@entry{id=22,dest={73656374696F6E2E312E33},srcline={138}}{4461746120616E616C79736973} \@input{main/ch_group_projects.aux} \BKM@entry{id=23,dest={636861707465722E32},srcline={2}}{5075626C6973686564206C61626F7261746F72792070726F6A65637473} \BKM@entry{id=24,dest={636861707465722E32},srcline={5}}{5075626C6973686564206C61626F7261746F72792070726F6A65637473} \BKM@entry{id=25,dest={73656374696F6E2E322E31},srcline={12}}{4D6173732047656E6F6D6520416E6E6F746174696F6E207265706F7369746F7279} \BKM@entry{id=26,dest={73756273656374696F6E2E322E312E31},srcline={17}}{496E74726F64756374696F6E} \BKM@entry{id=27,dest={73756273656374696F6E2E322E312E32},srcline={23}}{4D474120636F6E74656E7420616E64206F7267616E697A6174696F6E} \pgfsyspdfmark {pgfid16}{0}{40463552} \pgfsyspdfmark {pgfid15}{6}{40511883} \BKM@entry{id=28,dest={73756273656374696F6E2E322E312E33},srcline={58}}{436F6E636C7573696F6E73} \BKM@entry{id=29,dest={73656374696F6E2E322E32},srcline={64}}{45756B6172796F7469632050726F6D6F746572204461746162617365} \BKM@entry{id=30,dest={73756273656374696F6E2E322E322E31},srcline={68}}{496E74726F64756374696F6E} \BKM@entry{id=31,dest={73756273656374696F6E2E322E322E32},srcline={86}}{4550446E6577206E6F7720616E6E6F7461746573205C28736F6D65206F665C2920796F7572206D757368726F6F6D7320616E6420766567657461626C6573} \BKM@entry{id=32,dest={73756273656374696F6E2E322E322E33},srcline={120}}{496E63726561736564206D617070696E6720707265636973696F6E20696E2068756D616E} \BKM@entry{id=33,dest={73756273656374696F6E2E322E322E34},srcline={132}}{496E746567726174696F6E206F66204550446E65772077697468206F74686572207265736F7572636573} \BKM@entry{id=34,dest={73756273656374696F6E2E322E322E35},srcline={138}}{436F6E636C7573696F6E73} \BKM@entry{id=35,dest={73756273656374696F6E2E322E322E36},srcline={142}}{4D6574686F6473} \BKM@entry{id=36,dest={73656374696F6E2E322E33},srcline={150}}{50574D5363616E} \BKM@entry{id=37,dest={73756273656374696F6E2E322E332E31},srcline={164}}{496E74726F64756374696F6E} \BKM@entry{id=38,dest={73756273656374696F6E2E322E332E32},srcline={191}}{4461746120616E64206D6574686F6473} \BKM@entry{id=39,dest={73756273656374696F6E2E322E332E33},srcline={217}}{42656E63686D61726B} \BKM@entry{id=40,dest={73756273656374696F6E2E322E332E34},srcline={287}}{436F6E636C7573696F6E73} \BKM@entry{id=41,dest={73656374696F6E2E322E34},srcline={297}}{535061722D4B} \BKM@entry{id=42,dest={73756273656374696F6E2E322E342E31},srcline={305}}{496E74726F64756374696F6E} \BKM@entry{id=43,dest={73756273656374696F6E2E322E342E32},srcline={318}}{4D6574686F6473} \BKM@entry{id=44,dest={73756273656374696F6E2E322E342E33},srcline={349}}{526573756C7473} \BKM@entry{id=45,dest={73756273656374696F6E2E322E342E34},srcline={356}}{436F6E636C7573696F6E} \@input{main/ch_encode_peaks.aux} \BKM@entry{id=46,dest={636861707465722E33},srcline={2}}{454E434F4445207065616B7320616E616C79736973} \BKM@entry{id=47,dest={636861707465722E33},srcline={5}}{454E434F4445207065616B7320616E616C79736973} \BKM@entry{id=48,dest={73656374696F6E2E332E31},srcline={22}}{44617461} \pgfsyspdfmark {pgfid18}{0}{40463552} \pgfsyspdfmark {pgfid17}{6}{40511883} \BKM@entry{id=49,dest={73656374696F6E2E332E32},srcline={45}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206368726F6D6174696E2061726368697465637475726573} \BKM@entry{id=50,dest={73756273656374696F6E2E332E322E31},srcline={64}}{44617461207265616C69676E6D656E74} \BKM@entry{id=51,dest={73656374696F6E2E332E33},srcline={76}}{4E75636C656F736F6D65206F7267616E697A6174696F6E2061726F756E64207472616E736372697074696F6E20666163746F722062696E64696E67207369746573} \BKM@entry{id=52,dest={73656374696F6E2E332E34},srcline={103}}{5468652063617365206F6620435443462C2052414432312C20534D43332C2059593120616E64205A4E46313433} \BKM@entry{id=53,dest={73656374696F6E2E332E35},srcline={141}}{4354434620616E64204A756E4420696E7465726163746F6D6573} \BKM@entry{id=54,dest={73656374696F6E2E332E36},srcline={230}}{454246312062696E6473206E75636C656F736F6D6573} \BKM@entry{id=55,dest={73656374696F6E2E332E37},srcline={267}}{4D6574686F6473} \BKM@entry{id=56,dest={73756273656374696F6E2E332E372E31},srcline={269}}{4461746120616E6420646174612070726F63657373696E67} \BKM@entry{id=57,dest={73756273656374696F6E2E332E372E32},srcline={282}}{436C617373696669636174696F6E206F66204D4E617365207061747465726E73} \BKM@entry{id=58,dest={73756273656374696F6E2E332E372E33},srcline={295}}{5175616E74696679696E67206E75636C656F736F6D6520617272617920696E74656E736974792066726F6D20636C617373696669636174696F6E20726573756C7473} \BKM@entry{id=59,dest={73756273656374696F6E2E332E372E34},srcline={324}}{5065616B20636F6C6F63616C697A6174696F6E} \BKM@entry{id=60,dest={73756273656374696F6E2E332E372E35},srcline={328}}{4E445220646574656374696F6E} \BKM@entry{id=61,dest={73756273656374696F6E2E332E372E36},srcline={420}}{4354434620616E64204A756E4420696E7465726163746F7273} \BKM@entry{id=62,dest={73756273656374696F6E2E332E372E37},srcline={432}}{4542463120616E64206E75636C656F736F6D65} \@input{main/ch_smile-seq.aux} \BKM@entry{id=63,dest={636861707465722E34},srcline={2}}{534D694C452D736571206461746120616E616C79736973} \BKM@entry{id=64,dest={636861707465722E34},srcline={5}}{534D694C452D736571206461746120616E616C79736973} \BKM@entry{id=65,dest={73756273656374696F6E2E342E302E31},srcline={19}}{496E74726F64756374696F6E} \pgfsyspdfmark {pgfid20}{0}{40463552} \pgfsyspdfmark {pgfid19}{6}{40511883} \BKM@entry{id=66,dest={73756273656374696F6E2E342E302E32},srcline={36}}{48696464656E204D61726B6F76204D6F64656C204D6F74696620646973636F76657279} \BKM@entry{id=67,dest={73756273656374696F6E2E342E302E33},srcline={61}}{42696E64696E67206D6F746966206576616C756174696F6E} \BKM@entry{id=68,dest={73756273656374696F6E2E342E302E34},srcline={115}}{526573756C7473} \BKM@entry{id=69,dest={73756273656374696F6E2E342E302E35},srcline={133}}{436F6E636C7573696F6E73} \@input{main/ch_atac-seq.aux} \BKM@entry{id=70,dest={636861707465722E35},srcline={2}}{4368726F6D6174696E206163636573736962696C697479206F66206D6F6E6F6379746573} \BKM@entry{id=71,dest={73656374696F6E2E352E31},srcline={16}}{415441432D736571} \pgfsyspdfmark {pgfid22}{0}{40463552} \pgfsyspdfmark {pgfid21}{6}{40511883} \BKM@entry{id=72,dest={73656374696F6E2E352E32},srcline={33}}{4D6F6E69746F72696E672054462062696E64696E67} \BKM@entry{id=73,dest={73656374696F6E2E352E33},srcline={42}}{54686520616476656E74206F662073696E676C652063656C6C20444746} \BKM@entry{id=74,dest={73656374696F6E2E352E34},srcline={69}}{4F70656E20697373756573} \BKM@entry{id=75,dest={73656374696F6E2E352E35},srcline={73}}{44617461} \BKM@entry{id=76,dest={73656374696F6E2E352E36},srcline={84}}{4964656E74696679696E67206F7665722D726570726573656E746564207369676E616C73} \BKM@entry{id=77,dest={73756273656374696F6E2E352E362E31},srcline={89}}{43684950506172746974696F6E696E67203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642072656164207061747465726E73} \BKM@entry{id=78,dest={73756273656374696F6E2E352E362E32},srcline={101}}{454D53657175656E6365203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E636573} \BKM@entry{id=79,dest={73756273656374696F6E2E352E362E33},srcline={200}}{454D4A6F696E74203A20616E20616C676F726974686D20746F206964656E74696679206F7665722D726570726573656E7465642073657175656E63657320616E64206368726F6D6174696E2061726368697465637475726573} \BKM@entry{id=80,dest={73756273656374696F6E2E352E362E34},srcline={235}}{44617461207265616C69676E6D656E74} \BKM@entry{id=81,dest={73656374696F6E2E352E37},srcline={249}}{526573756C7473} \BKM@entry{id=82,dest={73756273656374696F6E2E352E372E31},srcline={253}}{467261676D656E742073697A6520616E616C79736973} \BKM@entry{id=83,dest={73756273656374696F6E2E352E372E32},srcline={282}}{4D6561737572696E67206F70656E206368726F6D6174696E20616E64206E75636C656F736F6D65206F63637570616E6379} \BKM@entry{id=84,dest={73756273656374696F6E2E352E372E33},srcline={312}}{4576616C756174696F6E206F6620454D53657175656E636520616E642043684950506172746974696F6E696E67} \BKM@entry{id=85,dest={73656374696F6E2E352E38},srcline={382}}{416C69676E696E67207468652062696E64696E67207369746573} \BKM@entry{id=86,dest={73656374696F6E2E352E39},srcline={407}}{4578706C6F72696E6720696E646976696475616C20544620636C6173736573} \BKM@entry{id=87,dest={73656374696F6E2E352E3130},srcline={423}}{44697363757373696F6E73} \BKM@entry{id=88,dest={73656374696F6E2E352E3131},srcline={433}}{506572737065637469766573} \BKM@entry{id=89,dest={73656374696F6E2E352E3132},srcline={443}}{4D6574686F6473} \BKM@entry{id=90,dest={73756273656374696F6E2E352E31322E31},srcline={445}}{506172746974696F6E696E672070726F6772616D73} \BKM@entry{id=91,dest={73756273656374696F6E2E352E31322E32},srcline={460}}{467261676D656E7420636C6173736573} \BKM@entry{id=92,dest={73756273656374696F6E2E352E31322E33},srcline={477}}{53696D756C617465642073657175656E636573} \BKM@entry{id=93,dest={73756273656374696F6E2E352E31322E34},srcline={481}}{5265616C69676E6D656E74207573696E67204A4153504152206D6F74696673} \BKM@entry{id=94,dest={73756273656374696F6E2E352E31322E35},srcline={526}}{4D6F64656C20657874656E73696F6E} \BKM@entry{id=95,dest={73756273656374696F6E2E352E31322E36},srcline={536}}{45787472616374696E6720646174612061737369676E656420746F206120636C617373} \BKM@entry{id=96,dest={73756273656374696F6E2E352E31322E37},srcline={603}}{5065616B2070726F63657373696E67} \BKM@entry{id=97,dest={73756273656374696F6E2E352E31322E38},srcline={612}}{50657220544620636C6173736573} \BKM@entry{id=98,dest={73756273656374696F6E2E352E31322E39},srcline={621}}{506572205446207375622D636C6173736573} \@input{tail/appendix.aux} \BKM@entry{id=99,dest={617070656E6469782E41},srcline={5}}{416E20617070656E646978} \BKM@entry{id=100,dest={73656374696F6E2E412E31},srcline={7}}{537570706C656D656E746172792066696775726573} \@writefile{toc}{\vspace {\normalbaselineskip }} \pgfsyspdfmark {pgfid24}{0}{40463552} \pgfsyspdfmark {pgfid23}{6}{40511883} \@input{tail/biblio.aux} -\BKM@entry{id=101,dest={73656374696F6E2A2E3632},srcline={3}}{4269626C696F677261706879} +\BKM@entry{id=101,dest={73656374696F6E2A2E3633},srcline={3}}{4269626C696F677261706879} \pgfsyspdfmark {pgfid26}{0}{40463552} \pgfsyspdfmark {pgfid25}{6}{40498788} -\BKM@entry{id=102,dest={617070656E6469782A2E3633},srcline={6}}{4269626C696F677261706879} +\BKM@entry{id=102,dest={617070656E6469782A2E3634},srcline={6}}{4269626C696F677261706879} \@input{tail/cv.aux} -\BKM@entry{id=103,dest={73656374696F6E2A2E3634},srcline={4}}{437572726963756C756D205669746165} +\BKM@entry{id=103,dest={73656374696F6E2A2E3635},srcline={4}}{437572726963756C756D205669746165} diff --git a/my_thesis.bbl b/my_thesis.bbl index 0ab6081..da90dbe 100644 --- a/my_thesis.bbl +++ b/my_thesis.bbl @@ -1,716 +1,750 @@ \begin{thebibliography}{} \bibitem[Adey et~al., 2010]{adey_rapid_2010} Adey, A., Morrison, H.~G., {Asan}, Xun, X., Kitzman, J.~O., Turner, E.~H., Stackhouse, B., MacKenzie, A.~P., Caruccio, N.~C., Zhang, X., and Shendure, J. (2010). \newblock Rapid, low-input, low-bias construction of shotgun fragment libraries by high-density in vitro transposition. \newblock {\em Genome Biology}, 11(12):R119. \bibitem[Aerts et~al., 2003]{aerts_toucan:_2003} Aerts, S., Thijs, G., Coessens, B., Staes, M., Moreau, Y., and Moor, B.~D. (2003). \newblock Toucan: deciphering the cis ‐regulatory logic of coregulated genes. \newblock {\em Nucleic Acids Research}, 31(6):1753--1764. \bibitem[Aibar et~al., 2017]{aibar_scenic:_2017} Aibar, S., González-Blas, C.~B., Moerman, T., Huynh-Thu, V.~A., Imrichova, H., Hulselmans, G., Rambow, F., Marine, J.-C., Geurts, P., Aerts, J., van~den Oord, J., Atak, Z.~K., Wouters, J., and Aerts, S. (2017). \newblock {SCENIC}: single-cell regulatory network inference and clustering. \newblock {\em Nature Methods}, 14(11):1083--1086. \bibitem[Ambrosini et~al., 2016a]{ambrosini_chip-seq_2016} Ambrosini, G., Dreos, R., Kumar, S., and Bucher, P. (2016a). \newblock The {ChIP}-{Seq} tools and web server: a resource for analyzing {ChIP}-seq and other types of genomic data. \newblock {\em BMC Genomics}, 17:938. \bibitem[Ambrosini et~al., 2016b]{ambrosini_chip-seq_2016-1} Ambrosini, G., Dreos, R., Kumar, S., and Bucher, P. (2016b). \newblock The {ChIP}-{Seq} tools and web server: a resource for analyzing {ChIP}-seq and other types of genomic data. \newblock {\em BMC Genomics}, 17(1):938. \bibitem[Ambrosini et~al., 2018]{ambrosini_pwmscan:_2018} Ambrosini, G., Groux, R., and Bucher, P. (2018). \newblock {PWMScan}: a fast tool for scanning entire genomes with a position-specific weight matrix. \newblock {\em Bioinformatics}, 34(14):2483--2484. \bibitem[Ambrosini et~al., 2003]{ambrosini_signal_2003} Ambrosini, G., Praz, V., Jagannathan, V., and Bucher, P. (2003). \newblock Signal search analysis server. \newblock {\em Nucleic Acids Research}, 31(13):3618--3620. \bibitem[Angerer et~al., 2017]{angerer_single_2017} Angerer, P., Simon, L., Tritschler, S., Wolf, F.~A., Fischer, D., and Theis, F.~J. (2017). \newblock Single cells make big data: {New} challenges and opportunities in transcriptomics. \newblock {\em Current Opinion in Systems Biology}, 4:85--91. \bibitem[Bailey et~al., 2015]{bailey_znf143_2015} Bailey, S.~D., Zhang, X., Desai, K., Aid, M., Corradin, O., Cowper-Sal·lari, R., Akhtar-Zaidi, B., Scacheri, P.~C., Haibe-Kains, B., and Lupien, M. (2015). \newblock {ZNF}143 provides sequence specificity to secure chromatin interactions at gene promoters. \newblock {\em Nature Communications}, 2:6186. \bibitem[Bailey et~al., 2009]{bailey_meme_2009} Bailey, T.~L., Boden, M., Buske, F.~A., Frith, M., Grant, C.~E., Clementi, L., Ren, J., Li, W.~W., and Noble, W.~S. (2009). \newblock {MEME} {Suite}: tools for motif discovery and searching. \newblock {\em Nucleic Acids Research}, 37(suppl\_2):W202--W208. \bibitem[Barrett et~al., 2011]{barrett_ncbi_2011} Barrett, T., Troup, D.~B., Wilhite, S.~E., Ledoux, P., Evangelista, C., Kim, I.~F., Tomashevsky, M., Marshall, K.~A., Phillippy, K.~H., Sherman, P.~M., Muertter, R.~N., Holko, M., Ayanbule, O., Yefanov, A., and Soboleva, A. (2011). \newblock {NCBI} {GEO}: archive for functional genomics data sets—10 years on. \newblock {\em Nucleic Acids Research}, 39(suppl\_1):D1005--D1010. \bibitem[Barski et~al., 2007]{barski_high-resolution_2007} Barski, A., Cuddapah, S., Cui, K., Roh, T.-Y., Schones, D.~E., Wang, Z., Wei, G., Chepelev, I., and Zhao, K. (2007). \newblock High-{Resolution} {Profiling} of {Histone} {Methylations} in the {Human} {Genome}. \newblock {\em Cell}, 129(4):823--837. \bibitem[Beckstette et~al., 2006]{beckstette_fast_2006} Beckstette, M., Homann, R., Giegerich, R., and Kurtz, S. (2006). \newblock Fast index based algorithms and software for matching position specific scoring matrices. \newblock {\em BMC Bioinformatics}, 7:389. \bibitem[Berest et~al., 2018]{berest_quantification_2018} Berest, I., Arnold, C., Reyes-Palomares, A., Palla, G., Rasmussen, K.~D., Helin, K., and Zaugg, J. (2018). \newblock Quantification of differential transcription factor activity and multiomics-based classification into activators and repressors: {diffTF}. \newblock {\em bioRxiv}. \bibitem[Berger and Bulyk, 2009]{berger_universal_2009} Berger, M.~F. and Bulyk, M.~L. (2009). \newblock Universal protein-binding microarrays for the comprehensive characterization of the {DNA}-binding specificities of transcription factors. \newblock {\em Nature Protocols}, 4(3):393--411. \bibitem[Boller et~al., 2018]{boller_defining_2018} Boller, S., Li, R., and Grosschedl, R. (2018). \newblock Defining {B} {Cell} {Chromatin}: {Lessons} from {EBF}1. \newblock {\em Trends in Genetics}, 34(4):257--269. \bibitem[Boller et~al., 2016]{boller_pioneering_2016} Boller, S., Ramamoorthy, S., Akbas, D., Nechanitzky, R., Burger, L., Murr, R., Schübeler, D., and Grosschedl, R. (2016). \newblock Pioneering {Activity} of the {C}-{Terminal} {Domain} of {EBF}1 {Shapes} the {Chromatin} {Landscape} for {B} {Cell} {Programming}. \newblock {\em Immunity}, 44(3):527--541. \bibitem[Boyle et~al., 2008]{boyle_high-resolution_2008} Boyle, A.~P., Davis, S., Shulha, H.~P., Meltzer, P., Margulies, E.~H., Weng, Z., Furey, T.~S., and Crawford, G.~E. (2008). \newblock High-{Resolution} {Mapping} and {Characterization} of {Open} {Chromatin} across the {Genome}. \newblock {\em Cell}, 132(2):311--322. \bibitem[Bucher and Trifonov, 1986]{bucher_compilation_1986} Bucher, P. and Trifonov, E.~N. (1986). \newblock Compilation and analysis of eukaryotic {POL} {II} promoter sequences. \newblock {\em Nucleic Acids Research}, 14(24):10009--10026. \bibitem[Buenrostro et~al., 2013]{buenrostro_transposition_2013} Buenrostro, J.~D., Giresi, P.~G., Zaba, L.~C., Chang, H.~Y., and Greenleaf, W.~J. (2013). \newblock Transposition of native chromatin for fast and sensitive epigenomic profiling of open chromatin, {DNA}-binding proteins and nucleosome position. \newblock {\em Nature Methods}, 10(12):1213--1218. \bibitem[Castro-Mondragon et~al., 2017]{castro-mondragon_rsat_2017} Castro-Mondragon, J.~A., Jaeger, S., Thieffry, D., Thomas-Chollier, M., and van Helden, J. (2017). \newblock {RSAT} matrix-clustering: dynamic exploration and redundancy reduction of transcription factor binding motif collections. \newblock {\em Nucleic Acids Research}, 45(13):e119--e119. \bibitem[Chatr-aryamontri et~al., 2017]{chatr-aryamontri_biogrid_2017} Chatr-aryamontri, A., Oughtred, R., Boucher, L., Rust, J., Chang, C., Kolas, N.~K., O'Donnell, L., Oster, S., Theesfeld, C., Sellam, A., Stark, C., Breitkreutz, B.-J., Dolinski, K., and Tyers, M. (2017). \newblock The {BioGRID} interaction database: 2017 update. \newblock {\em Nucleic Acids Research}, 45(D1):D369--D379. \bibitem[Cheng et~al., 2012]{cheng_understanding_2012} Cheng, C., Alexander, R., Min, R., Leng, J., Yip, K.~Y., Rozowsky, J., Yan, K.-K., Dong, X., Djebali, S., Ruan, Y., Davis, C.~A., Carninci, P., Lassman, T., Gingeras, T.~R., Guigó, R., Birney, E., Weng, Z., Snyder, M., and Gerstein, M. (2012). \newblock Understanding transcriptional regulation by integrative analysis of transcription factor binding data. \newblock {\em Genome Research}, 22(9):1658--1667. \bibitem[Cirillo et~al., 2002]{cirillo_opening_2002} Cirillo, L.~A., Lin, F.~R., Cuesta, I., Friedman, D., Jarnik, M., and Zaret, K.~S. (2002). \newblock Opening of {Compacted} {Chromatin} by {Early} {Developmental} {Transcription} {Factors} {HNF}3 ({FoxA}) and {GATA}-4. \newblock {\em Molecular Cell}, 9(2):279--289. \bibitem[Consortium, 2012]{consortium_integrated_2012} Consortium, T. E.~P. (2012). \newblock An integrated encyclopedia of {DNA} elements in the human genome. \newblock {\em Nature}, 489(7414):57--74. \bibitem[Dalton et~al., 2009]{dalton_clustering_2009} Dalton, L., Ballarin, V., and Brun, M. (2009). \newblock Clustering {Algorithms}: {On} {Learning}, {Validation}, {Performance}, and {Applications} to {Genomics}. \newblock {\em Current Genomics}, 10(6):430--445. \bibitem[Donohoe et~al., 2007]{donohoe_identification_2007} Donohoe, M.~E., Zhang, L.-F., Xu, N., Shi, Y., and Lee, J.~T. (2007). \newblock Identification of a {Ctcf} {Cofactor}, {Yy}1, for the {X} {Chromosome} {Binary} {Switch}. \newblock {\em Molecular Cell}, 25(1):43--56. +\bibitem[Dreos et~al., 2016]{dreos_influence_2016} +Dreos, R., Ambrosini, G., and Bucher, P. (2016). +\newblock Influence of {Rotational} {Nucleosome} {Positioning} on + {Transcription} {Start} {Site} {Selection} in {Animal} {Promoters}. +\newblock {\em PLOS Computational Biology}, 12(10):e1005144. + \bibitem[Dreos et~al., 2013]{dreos_epd_2013} Dreos, R., Ambrosini, G., Cavin~Périer, R., and Bucher, P. (2013). \newblock {EPD} and {EPDnew}, high-quality promoter resources in the next-generation sequencing era. \newblock {\em Nucleic Acids Research}, 41(D1):D157--D164. \bibitem[Dreos et~al., 2017]{dreos_eukaryotic_2017} Dreos, R., Ambrosini, G., Groux, R., Cavin Périer, R., and Bucher, P. (2017). \newblock The eukaryotic promoter database in its 30th year: focus on non-vertebrate organisms. \newblock {\em Nucleic Acids Research}, 45(D1):D51--D55. \bibitem[Dreos et~al., 2018]{dreos_mga_2018} Dreos, R., Ambrosini, G., Groux, R., Périer, R.~C., and Bucher, P. (2018). \newblock {MGA} repository: a curated data resource for {ChIP}-seq and other genome annotated data. \newblock {\em Nucleic Acids Research}, 46(D1):D175--D180. \bibitem[Dreos et~al., 2015]{dreos_eukaryotic_2015} Dreos, R., Ambrosini, G., Périer, R.~C., and Bucher, P. (2015). \newblock The {Eukaryotic} {Promoter} {Database}: expansion of {EPDnew} and new promoter analysis tools. \newblock {\em Nucleic Acids Research}, 43(D1):D92--D96. \bibitem[Fan et~al., 2016]{fan_characterizing_2016} Fan, J., Salathia, N., Liu, R., Kaeser, G.~E., Yung, Y.~C., Herman, J.~L., Kaper, F., Fan, J.-B., Zhang, K., Chun, J., and Kharchenko, P.~V. (2016). \newblock Characterizing transcriptional heterogeneity through pathway and gene set overdispersion analysis. \newblock {\em Nature Methods}, 13(3):241--244. \bibitem[Fu et~al., 2004]{fu_motifviz:_2004} Fu, Y., Frith, M.~C., Haverty, P.~M., and Weng, Z. (2004). \newblock {MotifViz}: an analysis and visualization tool for motif discovery. \newblock {\em Nucleic Acids Research}, 32(suppl\_2):W420--W423. \bibitem[Fu et~al., 2008]{fu_insulator_2008} Fu, Y., Sinha, M., Peterson, C.~L., and Weng, Z. (2008). \newblock The {Insulator} {Binding} {Protein} {CTCF} {Positions} 20 {Nucleosomes} around {Its} {Binding} {Sites} across the {Human} {Genome}. \newblock {\em PLOS Genetics}, 4(7):e1000138. \bibitem[Gaffney et~al., 2012]{gaffney_controls_2012} Gaffney, D.~J., McVicker, G., Pai, A.~A., Fondufe-Mittendorf, Y.~N., Lewellen, N., Michelini, K., Widom, J., Gilad, Y., and Pritchard, J.~K. (2012). \newblock Controls of {Nucleosome} {Positioning} in the {Human} {Genome}. \newblock {\em PLoS Genet}, 8(11):e1003036. \bibitem[Gerstein et~al., 2012]{gerstein_architecture_2012} Gerstein, M.~B., Kundaje, A., Hariharan, M., Landt, S.~G., Yan, K.-K., Cheng, C., Mu, X.~J., Khurana, E., Rozowsky, J., Alexander, R., Min, R., Alves, P., Abyzov, A., Addleman, N., Bhardwaj, N., Boyle, A.~P., Cayting, P., Charos, A., Chen, D.~Z., Cheng, Y., Clarke, D., Eastman, C., Euskirchen, G., Frietze, S., Fu, Y., Gertz, J., Grubert, F., Harmanci, A., Jain, P., Kasowski, M., Lacroute, P., Leng, J., Lian, J., Monahan, H., O’Geen, H., Ouyang, Z., Partridge, E.~C., Patacsil, D., Pauli, F., Raha, D., Ramirez, L., Reddy, T.~E., Reed, B., Shi, M., Slifer, T., Wang, J., Wu, L., Yang, X., Yip, K.~Y., Zilberman-Schapira, G., Batzoglou, S., Sidow, A., Farnham, P.~J., Myers, R.~M., Weissman, S.~M., and Snyder, M. (2012). \newblock Architecture of the human regulatory network derived from {ENCODE} data. \newblock {\em Nature}, 489(7414):91--100. \bibitem[Ghirlando and Felsenfeld, 2016]{ghirlando_ctcf:_2016} Ghirlando, R. and Felsenfeld, G. (2016). \newblock {CTCF}: making the right connections. \newblock {\em Genes \& Development}, 30(8):881--891. \bibitem[González-Blas et~al., 2019]{gonzalez-blas_cistopic:_2019} González-Blas, C.~B., Minnoye, L., Papasokrati, D., Aibar, S., Hulselmans, G., Christiaens, V., Davie, K., Wouters, J., and Aerts, S. (2019). \newblock {cisTopic}: cis-regulatory topic modeling on single-cell {ATAC}-seq data. \newblock {\em Nature Methods}, 16(5):397. \bibitem[Grant et~al., 2011]{grant_fimo:_2011} Grant, C.~E., Bailey, T.~L., and Noble, W.~S. (2011). \newblock {FIMO}: scanning for occurrences of a given motif. \newblock {\em Bioinformatics}, 27(7):1017--1018. \bibitem[Grossman et~al., 2018]{grossman_positional_2018} Grossman, S.~R., Engreitz, J., Ray, J.~P., Nguyen, T.~H., Hacohen, N., and Lander, E.~S. (2018). \newblock Positional specificity of different transcription factor classes within enhancers. \newblock {\em Proceedings of the National Academy of Sciences}, 115(30):E7222--E7230. \bibitem[Groux and Bucher, 2019]{groux_spar-k:_2019} Groux, R. and Bucher, P. (2019). \newblock {SPar}-{K}: a method to partition {NGS} signal data. \newblock {\em Bioinformatics}. \bibitem[Guo et~al., 2012]{guo_high_2012} Guo, Y., Mahony, S., and Gifford, D.~K. (2012). \newblock High {Resolution} {Genome} {Wide} {Binding} {Event} {Finding} and {Motif} {Discovery} {Reveals} {Transcription} {Factor} {Spatial} {Binding} {Constraints}. \newblock {\em PLOS Computational Biology}, 8(8):e1002638. \bibitem[Hagman and Lukin, 2005]{hagman_early_2005} Hagman, J. and Lukin, K. (2005). \newblock Early {B}-cell factor ‘pioneers’ the way for {B}-cell development. \newblock {\em Trends in Immunology}, 26(9):455--461. \bibitem[Heinz et~al., 2010]{heinz_simple_2010} Heinz, S., Benner, C., Spann, N., Bertolino, E., Lin, Y.~C., Laslo, P., Cheng, J.~X., Murre, C., Singh, H., and Glass, C.~K. (2010). \newblock Simple {Combinations} of {Lineage}-{Determining} {Transcription} {Factors} {Prime} cis-{Regulatory} {Elements} {Required} for {Macrophage} and {B} {Cell} {Identities}. \newblock {\em Molecular Cell}, 38(4):576--589. \bibitem[Henikoff and Smith, 2015]{henikoff_histone_2015} Henikoff, S. and Smith, M.~M. (2015). \newblock Histone {Variants} and {Epigenetics}. \newblock {\em Cold Spring Harbor Perspectives in Biology}, 7(1):a019364. \bibitem[Hertz et~al., 1990]{hertz_identification_1990} Hertz, G.~Z., Hartzell, G.~W., and Stormo, G.~D. (1990). \newblock Identification of consensus patterns in unaligned {DNA} sequences known to be functionally related. \newblock {\em Computer applications in the biosciences: CABIOS}, 6(2):81--92. \bibitem[Hon et~al., 2008]{hon_chromasig:_2008} Hon, G., Ren, B., and Wang, W. (2008). \newblock {ChromaSig}: {A} {Probabilistic} {Approach} to {Finding} {Common} {Chromatin} {Signatures} in the {Human} {Genome}. \newblock {\em PLOS Computational Biology}, 4(10):e1000201. \bibitem[Hyun et~al., 2017]{hyun_writing_2017} Hyun, K., Jeon, J., Park, K., and Kim, J. (2017). \newblock Writing, erasing and reading histone lysine methylations. \newblock {\em Experimental \& Molecular Medicine}, 49(4):e324--e324. \bibitem[Ioshikhes et~al., 2011]{ioshikhes_variety_2011} Ioshikhes, I., Hosid, S., and Pugh, B.~F. (2011). \newblock Variety of genomic {DNA} patterns for nucleosome positioning. \newblock {\em Genome Research}, 21(11):1863--1871. \bibitem[Isakova et~al., 2017]{isakova_smile-seq_2017} Isakova, A., Groux, R., Imbeault, M., Rainer, P., Alpern, D., Dainese, R., Ambrosini, G., Trono, D., Bucher, P., and Deplancke, B. (2017). \newblock {SMiLE}-seq identifies binding motifs of single and dimeric transcription factors. \newblock {\em Nature Methods}, advance online publication. +\bibitem[Jiang and Pugh, 2009]{jiang_nucleosome_2009} +Jiang, C. and Pugh, B.~F. (2009). +\newblock Nucleosome positioning and gene regulation: advances through + genomics. +\newblock {\em Nature Reviews Genetics}, 10(3):161--172. + \bibitem[Jolma et~al., 2010]{jolma_multiplexed_2010} Jolma, A., Kivioja, T., Toivonen, J., Cheng, L., Wei, G., Enge, M., Taipale, M., Vaquerizas, J.~M., Yan, J., Sillanpää, M.~J., Bonke, M., Palin, K., Talukder, S., Hughes, T.~R., Luscombe, N.~M., Ukkonen, E., and Taipale, J. (2010). \newblock Multiplexed massively parallel {SELEX} for characterization of human transcription factor binding specificities. \newblock {\em Genome Research}, 20(6):861--873. \bibitem[Jolma and Taipale, 2011]{jolma_methods_2011-2} Jolma, A. and Taipale, J. (2011). \newblock Methods for {Analysis} of {Transcription} {Factor} {DNA}-{Binding} {Specificity} {In} {Vitro}, {Chapter} 9, {How} {Transcription} {Factors} {Identify} {Regulatory} {Sites} in {Genomic} {Sequence}. \newblock In Hughes, T.~R., editor, {\em A {Handbook} of {Transcription} {Factors}}, number~52 in Subcellular {Biochemistry}, pages 193--204. Springer Netherlands. \bibitem[Jolma et~al., 2013]{jolma_dna-binding_2013} Jolma, A., Yan, J., Whitington, T., Toivonen, J., Nitta, K., Rastas, P., Morgunova, E., Enge, M., Taipale, M., Wei, G., Palin, K., Vaquerizas, J., Vincentelli, R., Luscombe, N., Hughes, T., Lemaire, P., Ukkonen, E., Kivioja, T., and Taipale, J. (2013). \newblock {DNA}-{Binding} {Specificities} of {Human} {Transcription} {Factors}. \newblock {\em Cell}, 152(1–2):327--339. \bibitem[Kent, 2002]{kent_blatblast-like_2002} Kent, W.~J. (2002). \newblock {BLAT}—{The} {BLAST}-{Like} {Alignment} {Tool}. \newblock {\em Genome Research}, 12(4):656--664. \bibitem[Khan et~al., 2018]{khan_jaspar_2018} Khan, A., Fornes, O., Stigliani, A., Gheorghe, M., Castro-Mondragon, J.~A., van der Lee, R., Bessy, A., Chèneby, J., Kulkarni, S.~R., Tan, G., Baranasic, D., Arenillas, D.~J., Sandelin, A., Vandepoele, K., Lenhard, B., Ballester, B., Wasserman, W.~W., Parcy, F., and Mathelier, A. (2018). \newblock {JASPAR} 2018: update of the open-access database of transcription factor binding profiles and its web framework. \newblock {\em Nucleic Acids Research}, 46(D1):D260--D266. \bibitem[Kiselev et~al., 2017]{kiselev_sc3:_2017} Kiselev, V.~Y., Kirschner, K., Schaub, M.~T., Andrews, T., Yiu, A., Chandra, T., Natarajan, K.~N., Reik, W., Barahona, M., Green, A.~R., and Hemberg, M. (2017). \newblock {SC}3: consensus clustering of single-cell {RNA}-seq data. \newblock {\em Nature Methods}, 14(5):483--486. \bibitem[Kouzarides, 2007]{kouzarides_chromatin_2007} Kouzarides, T. (2007). \newblock Chromatin {Modifications} and {Their} {Function}. \newblock {\em Cell}, 128(4):693--705. +\bibitem[Kubik et~al., 2015]{kubik_nucleosome_2015} +Kubik, S., Bruzzone, M., Jacquet, P., Falcone, J.-L., Rougemont, J., and Shore, + D. (2015). +\newblock Nucleosome {Stability} {Distinguishes} {Two} {Different} {Promoter} + {Types} at {All} {Protein}-{Coding} {Genes} in {Yeast}. +\newblock {\em Molecular Cell}, 60(3):422--434. + \bibitem[Kulakovskiy et~al., 2018]{kulakovskiy_hocomoco:_2018} Kulakovskiy, I.~V., Vorontsov, I.~E., Yevshin, I.~S., Sharipov, R.~N., Fedorova, A.~D., Rumynskiy, E.~I., Medvedeva, Y.~A., Magana-Mora, A., Bajic, V.~B., Papatsenko, D.~A., Kolpakov, F.~A., and Makeev, V.~J. (2018). \newblock {HOCOMOCO}: towards a complete collection of transcription factor binding models for human and mouse via large-scale {ChIP}-{Seq} analysis. \newblock {\em Nucleic Acids Research}, 46(D1):D252--D259. \bibitem[Kulakovskiy et~al., 2016]{kulakovskiy_hocomoco:_2016} Kulakovskiy, I.~V., Vorontsov, I.~E., Yevshin, I.~S., Soboleva, A.~V., Kasianov, A.~S., Ashoor, H., Ba-alawi, W., Bajic, V.~B., Medvedeva, Y.~A., Kolpakov, F.~A., and Makeev, V.~J. (2016). \newblock {HOCOMOCO}: expansion and enhancement of the collection of transcription factor binding sites models. \newblock {\em Nucleic Acids Research}, 44(D1):D116--D125. \bibitem[Kundaje et~al., 2012]{kundaje_ubiquitous_2012} Kundaje, A., Kyriazopoulou-Panagiotopoulou, S., Libbrecht, M., Smith, C.~L., Raha, D., Winters, E.~E., Johnson, S.~M., Snyder, M., Batzoglou, S., and Sidow, A. (2012). \newblock Ubiquitous heterogeneity and asymmetry of the chromatin environment at regulatory elements. \newblock {\em Genome Research}, 22(9):1735--1747. \bibitem[Kurotaki et~al., 2017]{kurotaki_transcriptional_2017} Kurotaki, D., Sasaki, H., and Tamura, T. (2017). \newblock Transcriptional control of monocyte and macrophage development. \newblock {\em International Immunology}, 29(3):97--107. \bibitem[Langmead and Salzberg, 2012]{langmead_fast_2012} Langmead, B. and Salzberg, S.~L. (2012). \newblock Fast gapped-read alignment with {Bowtie} 2. \newblock {\em Nature Methods}, 9(4):357--359. \bibitem[Langmead et~al., 2009]{langmead_ultrafast_2009} Langmead, B., Trapnell, C., Pop, M., and Salzberg, S.~L. (2009). \newblock Ultrafast and memory-efficient alignment of short {DNA} sequences to the human genome. \newblock {\em Genome Biology}, 10(3):R25. \bibitem[Li et~al., 2009]{li_sequence_2009} Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., Marth, G., Abecasis, G., and Durbin, R. (2009). \newblock The {Sequence} {Alignment}/{Map} format and {SAMtools}. \newblock {\em Bioinformatics}, 25(16):2078--2079. \bibitem[Li et~al., 2019]{li_identification_2019} Li, Z., Schulz, M.~H., Look, T., Begemann, M., Zenke, M., and Costa, I.~G. (2019). \newblock Identification of transcription factor binding sites using {ATAC}-seq. \newblock {\em Genome Biology}, 20(1):45. \bibitem[Lizio et~al., 2015]{lizio_gateways_2015} Lizio, M., Harshbarger, J., Shimoji, H., Severin, J., Kasukawa, T., Sahin, S., Abugessaisa, I., Fukuda, S., Hori, F., Ishikawa-Kato, S., Mungall, C.~J., Arner, E., Baillie, J.~K., Bertin, N., Bono, H., de~Hoon, M., Diehl, A.~D., Dimont, E., Freeman, T.~C., Fujieda, K., Hide, W., Kaliyaperumal, R., Katayama, T., Lassmann, T., Meehan, T.~F., Nishikata, K., Ono, H., Rehli, M., Sandelin, A., Schultes, E.~A., ‘t Hoen, P.~A., Tatum, Z., Thompson, M., Toyoda, T., Wright, D.~W., Daub, C.~O., Itoh, M., Carninci, P., Hayashizaki, Y., Forrest, A.~R., Kawaji, H., and {the FANTOM consortium} (2015). \newblock Gateways to the {FANTOM}5 promoter level mammalian expression atlas. \newblock {\em Genome Biology}, 16(1):22. \bibitem[Losada, 2014]{losada_cohesin_2014} Losada, A. (2014). \newblock Cohesin in cancer: chromosome segregation and beyond. \newblock {\em Nature Reviews Cancer}, 14(6):389--393. \bibitem[Längst and Manelyte, 2015]{langst_chromatin_2015} Längst, G. and Manelyte, L. (2015). \newblock Chromatin {Remodelers}: {From} {Function} to {Dysfunction}. \newblock {\em Genes}, 6(2):299--324. \bibitem[Maerkl and Quake, 2007]{maerkl_systems_2007} Maerkl, S.~J. and Quake, S.~R. (2007). \newblock A {Systems} {Approach} to {Measuring} the {Binding} {Energy} {Landscapes} of {Transcription} {Factors}. \newblock {\em Science}, 315(5809):233--237. \bibitem[Maier et~al., 2004]{maier_early_2004} Maier, H., Ostraat, R., Gao, H., Fields, S., Shinton, S.~A., Medina, K.~L., Ikawa, T., Murre, C., Singh, H., Hardy, R.~R., and Hagman, J. (2004). \newblock Early {B} cell factor cooperates with {Runx}1 and mediates epigenetic changes associated with mb-1 transcription. \newblock {\em Nature Immunology}, 5(10):1069--1077. \bibitem[Marsland, 2015]{marsland_machine_2015-1} Marsland, S. (2015). \newblock {\em Machine {Learning}, {An} algorithmic {Perspective}, {Chapter} 7 {Probabilistic} {Learning}}. \newblock CRC Press, Boca Raton, second edition edition. \bibitem[Mathelier et~al., 2014]{mathelier_jaspar_2014} Mathelier, A., Zhao, X., Zhang, A.~W., Parcy, F., Worsley-Hunt, R., Arenillas, D.~J., Buchman, S., Chen, C.-y., Chou, A., Ienasescu, H., Lim, J., Shyr, C., Tan, G., Zhou, M., Lenhard, B., Sandelin, A., and Wasserman, W.~W. (2014). \newblock {JASPAR} 2014: an extensively expanded and updated open-access database of transcription factor binding profiles. \newblock {\em Nucleic Acids Research}, 42(D1):D142--D147. \bibitem[{McGinty Robert K. and Tan Song}, 2014]{mcginty_robert_k._and_tan_song_fundamentals_2014} {McGinty Robert K. and Tan Song} (2014). \newblock {\em Fundamentals of {Chromatin}, chapter 1 {Histone}, {Nucleosomes} and {Chromatin} {Structure}}. \newblock Jerry L. Workman and Susan M. Abmayr, New York, 2014 edition. \bibitem[Nair et~al., 2014]{nair_probabilistic_2014} Nair, N.~U., Kumar, S., Moret, B. M.~E., and Bucher, P. (2014). \newblock Probabilistic partitioning methods to find significant patterns in {ChIP}-{Seq} data. \newblock {\em Bioinformatics}, 30(17):2406--2413. \bibitem[Neph et~al., 2012]{neph_expansive_2012} Neph, S., Vierstra, J., Stergachis, A.~B., Reynolds, A.~P., Haugen, E., Vernot, B., Thurman, R.~E., John, S., Sandstrom, R., Johnson, A.~K., Maurano, M.~T., Humbert, R., Rynes, E., Wang, H., Vong, S., Lee, K., Bates, D., Diegel, M., Roach, V., Dunn, D., Neri, J., Schafer, A., Hansen, R.~S., Kutyavin, T., Giste, E., Weaver, M., Canfield, T., Sabo, P., Zhang, M., Balasundaram, G., Byron, R., MacCoss, M.~J., Akey, J.~M., Bender, M.~A., Groudine, M., Kaul, R., and Stamatoyannopoulos, J.~A. (2012). \newblock An expansive human regulatory lexicon encoded in transcription factor footprints. \newblock {\em Nature}, 489(7414):83--90. \bibitem[Nielsen et~al., 2012]{nielsen_catchprofiles:_2012} Nielsen, F. G.~G., Markus, K.~G., Friborg, R.~M., Favrholdt, L.~M., Stunnenberg, H.~G., and Huynen, M. (2012). \newblock {CATCHprofiles}: {Clustering} and {Alignment} {Tool} for {ChIP} {Profiles}. \newblock {\em PLOS ONE}, 7(1):e28272. \bibitem[Ong and Corces, 2014]{ong_ctcf:_2014} Ong, C.-T. and Corces, V.~G. (2014). \newblock {CTCF}: an architectural protein bridging genome topology and function. \newblock {\em Nature Reviews Genetics}, 15(4):234--246. \bibitem[Orenstein and Shamir, 2014]{orenstein_comparative_2014} Orenstein, Y. and Shamir, R. (2014). \newblock A comparative analysis of transcription factor binding models learned from {PBM}, {HT}-{SELEX} and {ChIP} data. \newblock {\em Nucleic Acids Research}, 42(8):e63--e63. \bibitem[Ou et~al., 2018]{ou_motifstack_2018} Ou, J., Wolfe, S.~A., Brodsky, M.~H., and Zhu, L.~J. (2018). \newblock {motifStack} for the analysis of transcription factor binding site evolution. \newblock {\em Nature Methods}, 15(1):8--9. \bibitem[Pizzi and Ukkonen, 2008]{pizzi_fast_2008} Pizzi, C. and Ukkonen, E. (2008). \newblock Fast profile matching algorithms — {A} survey. \newblock {\em Theoretical Computer Science}, 395(2):137--157. \bibitem[Pollard et~al., 2010]{pollard_detection_2010} Pollard, K.~S., Hubisz, M.~J., Rosenbloom, K.~R., and Siepel, A. (2010). \newblock Detection of nonneutral substitution rates on mammalian phylogenies. \newblock {\em Genome Research}, 20(1):110--121. \bibitem[Quinlan and Hall, 2010]{quinlan_bedtools:_2010} Quinlan, A.~R. and Hall, I.~M. (2010). \newblock {BEDTools}: a flexible suite of utilities for comparing genomic features. \newblock {\em Bioinformatics}, 26(6):841--842. \bibitem[Raney et~al., 2014]{raney_track_2014} Raney, B.~J., Dreszer, T.~R., Barber, G.~P., Clawson, H., Fujita, P.~A., Wang, T., Nguyen, N., Paten, B., Zweig, A.~S., Karolchik, D., and Kent, W.~J. (2014). \newblock Track data hubs enable visualization of user-defined genome-wide annotations on the {UCSC} {Genome} {Browser}. \newblock {\em Bioinformatics}, 30(7):1003--1005. \bibitem[Rico et~al., 2017]{rico_comparative_2017} Rico, D., Martens, J.~H., Downes, K., Carrillo-de Santa-Pau, E., Pancaldi, V., Breschi, A., Richardson, D., Heath, S., Saeed, S., Frontini, M., Chen, L., Watt, S., Müller, F., Clarke, L., Kerstens, H.~H., Wilder, S.~P., Palumbo, E., Djebali, S., Raineri, E., Merkel, A., Esteve-Codina, A., Sultan, M., Bommel, A.~v., Gut, M., Yaspo, M.-L., Rubio, M., Fernandez, J.~M., Attwood, A., Torre, V. d.~l., Royo, R., Fragkogianni, S., Gelpí, J.~L., Torrents, D., Iotchkova, V., Logie, C., Aghajanirefah, A., Singh, A.~A., Janssen-Megens, E.~M., Berentsen, K., Erber, W., Rendon, A., Kostadima, M., Loos, R., Ent, M. A. v.~d., Kaan, A., Sharifi, N., Paul, D.~S., Ifrim, D.~C., Quintin, J., Love, M.~I., Pisano, D.~G., Burden, F., Foad, N., Farrow, S., Zerbino, D.~R., Dunham, I., Kuijpers, T., Lehrach, H., Lengauer, T., Bertone, P., Netea, M.~G., Vingron, M., Beck, S., Flicek, P., Gut, I., Ouwehand, W.~H., Bock, C., Soranzo, N., Guigo, R., Valencia, A., and Stunnenberg, H.~G. (2017). \newblock Comparative analysis of neutrophil and monocyte epigenomes. \newblock {\em bioRxiv}, page 237784. \bibitem[{Roadmap Epigenomics Consortium} et~al., 2015]{roadmap_epigenomics_consortium_integrative_2015} {Roadmap Epigenomics Consortium}, Kundaje, A., Meuleman, W., Ernst, J., Bilenky, M., Yen, A., Heravi-Moussavi, A., Kheradpour, P., Zhang, Z., Wang, J., Ziller, M.~J., Amin, V., Whitaker, J.~W., Schultz, M.~D., Ward, L.~D., Sarkar, A., Quon, G., Sandstrom, R.~S., Eaton, M.~L., Wu, Y.-C., Pfenning, A.~R., Wang, X., Claussnitzer, M., {Yaping Liu}, Coarfa, C., Alan~Harris, R., Shoresh, N., Epstein, C.~B., Gjoneska, E., Leung, D., Xie, W., David~Hawkins, R., Lister, R., Hong, C., Gascard, P., Mungall, A.~J., Moore, R., Chuah, E., Tam, A., Canfield, T.~K., Scott~Hansen, R., Kaul, R., Sabo, P.~J., Bansal, M.~S., Carles, A., Dixon, J.~R., Farh, K.-H., Feizi, S., Karlic, R., Kim, A.-R., Kulkarni, A., Li, D., Lowdon, R., Elliott, G., Mercer, T.~R., Neph, S.~J., Onuchic, V., Polak, P., Rajagopal, N., Ray, P., Sallari, R.~C., Siebenthall, K.~T., Sinnott-Armstrong, N.~A., Stevens, M., Thurman, R.~E., Wu, J., Zhang, B., Zhou, X., Beaudet, A.~E., Boyer, L.~A., Jager, P. L.~D., Farnham, P.~J., Fisher, S.~J., Haussler, D., Jones, S. J.~M., Li, W., Marra, M.~A., McManus, M.~T., Sunyaev, S., Thomson, J.~A., Tlsty, T.~D., Tsai, L.-H., Wang, W., Waterland, R.~A., Zhang, M.~Q., Chadwick, L.~H., Bernstein, B.~E., Costello, J.~F., Ecker, J.~R., Hirst, M., Meissner, A., Milosavljevic, A., Ren, B., Stamatoyannopoulos, J.~A., Wang, T., and Kellis, M. (2015). \newblock Integrative analysis of 111 reference human epigenomes. \newblock {\em Nature}, 518(7539):317--330. \bibitem[Rustici et~al., 2013]{rustici_arrayexpress_2013} Rustici, G., Kolesnikov, N., Brandizi, M., Burdett, T., Dylag, M., Emam, I., Farne, A., Hastings, E., Ison, J., Keays, M., Kurbatova, N., Malone, J., Mani, R., Mupo, A., Pedro~Pereira, R., Pilicheva, E., Rung, J., Sharma, A., Tang, Y.~A., Ternent, T., Tikhonov, A., Welter, D., Williams, E., Brazma, A., Parkinson, H., and Sarkans, U. (2013). \newblock {ArrayExpress} update—trends in database growth and links to data analysis tools. \newblock {\em Nucleic Acids Research}, 41(D1):D987--D990. +\bibitem[Schones et~al., 2008]{schones_dynamic_2008} +Schones, D.~E., Cui, K., Cuddapah, S., Roh, T.-Y., Barski, A., Wang, Z., Wei, + G., and Zhao, K. (2008). +\newblock Dynamic {Regulation} of {Nucleosome} {Positioning} in the {Human} + {Genome}. +\newblock {\em Cell}, 132(5):887--898. + \bibitem[Schones et~al., 2007]{schones_statistical_2007} Schones, D.~E., Smith, A.~D., and Zhang, M.~Q. (2007). \newblock Statistical significance of cis-regulatory modules. \newblock {\em BMC Bioinformatics}, 8(1):19. \bibitem[Schütz and Delorenzi, 2008]{schutz_mamot:_2008} Schütz, F. and Delorenzi, M. (2008). \newblock {MAMOT}: hidden {Markov} modeling tool. \newblock {\em Bioinformatics}, 24(11):1399--1400. \bibitem[Siepel et~al., 2005]{siepel_evolutionarily_2005} Siepel, A., Bejerano, G., Pedersen, J.~S., Hinrichs, A.~S., Hou, M., Rosenbloom, K., Clawson, H., Spieth, J., Hillier, L.~W., Richards, S., Weinstock, G.~M., Wilson, R.~K., Gibbs, R.~A., Kent, W.~J., Miller, W., and Haussler, D. (2005). \newblock Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. \newblock {\em Genome Research}, 15(8):1034--1050. \bibitem[Soufi et~al., 2015]{soufi_pioneer_2015} Soufi, A., Garcia, M.~F., Jaroszewicz, A., Osman, N., Pellegrini, M., and Zaret, K.~S. (2015). \newblock Pioneer {Transcription} {Factors} {Target} {Partial} {DNA} {Motifs} on {Nucleosomes} to {Initiate} {Reprogramming}. \newblock {\em Cell}, 161(3):555--568. \bibitem[Stedman et~al., 2008]{stedman_cohesins_2008} Stedman, W., Kang, H., Lin, S., Kissil, J.~L., Bartolomei, M.~S., and Lieberman, P.~M. (2008). \newblock Cohesins localize with {CTCF} at the {KSHV} latency control region and at cellular c-myc and {H}19 {Igf}2 insulators. \newblock {\em The EMBO Journal}, 27(4):654--666. \bibitem[Trifonov, 2011]{trifonov_cracking_2011} Trifonov, E.~N. (2011). \newblock Cracking the chromatin code: {Precise} rule of nucleosome positioning. \newblock {\em Physics of Life Reviews}, 8(1):39--50. \bibitem[Turatsinze et~al., 2008]{turatsinze_using_2008} Turatsinze, J.-V., Thomas-Chollier, M., Defrance, M., and Helden, J.~v. (2008). \newblock Using {RSAT} to scan genome sequences for transcription factor binding sites and cis -regulatory modules. \newblock {\em Nature Protocols}, 3(10):1578--1588. \bibitem[Vierstra and Stamatoyannopoulos, 2016]{vierstra_genomic_2016} Vierstra, J. and Stamatoyannopoulos, J.~A. (2016). \newblock Genomic footprinting. \newblock {\em Nature Methods}, 13(3):213--221. \bibitem[Voss and Hager, 2014]{voss_dynamic_2014} Voss, T.~C. and Hager, G.~L. (2014). \newblock Dynamic regulation of transcriptional states by chromatin and transcription factors. \newblock {\em Nature Reviews Genetics}, 15(2):69--81. \bibitem[Wang et~al., 2012]{wang_sequence_2012} Wang, J., Zhuang, J., Iyer, S., Lin, X., Whitfield, T.~W., Greven, M.~C., Pierce, B.~G., Dong, X., Kundaje, A., Cheng, Y., Rando, O.~J., Birney, E., Myers, R.~M., Noble, W.~S., Snyder, M., and Weng, Z. (2012). \newblock Sequence features and chromatin structure around the genomic regions bound by 119 human transcription factors. \newblock {\em Genome Research}, 22(9):1798--1812. \bibitem[Weirauch et~al., 2013]{weirauch_evaluation_2013} Weirauch, M.~T., Cote, A., Norel, R., Annala, M., Zhao, Y., Riley, T.~R., Saez-Rodriguez, J., Cokelaer, T., Vedenko, A., Talukder, S., {Dream5 Consortium}, Bussemaker, H.~J., Morris, Q.~D., Bulyk, M.~L., Stolovitzky, G., and Hughes, T.~R. (2013). \newblock Evaluation of methods for modeling transcription factor sequence specificity. \newblock {\em Nature Biotechnology}, 31(2):126--134. +\bibitem[West et~al., 2014]{west_nucleosomal_2014} +West, J.~A., Cook, A., Alver, B.~H., Stadtfeld, M., Deaton, A.~M., + Hochedlinger, K., Park, P.~J., Tolstorukov, M.~Y., and Kingston, R.~E. + (2014). +\newblock Nucleosomal occupancy changes locally over key regulatory regions + during cell differentiation and reprogramming. +\newblock {\em Nature Communications}, 5(1):1--12. + \bibitem[Wu et~al., 2016]{wu_biogps:_2016} Wu, C., Jin, X., Tsueng, G., Afrasiabi, C., and Su, A.~I. (2016). \newblock {BioGPS}: building your own mash-up of gene annotations and expression profiles. \newblock {\em Nucleic Acids Research}, 44(D1):D313--D316. \bibitem[Zaret and Carroll, 2011]{zaret_pioneer_2011} Zaret, K.~S. and Carroll, J.~S. (2011). \newblock Pioneer transcription factors: establishing competence for gene expression. \newblock {\em Genes \& Development}, 25(21):2227--2241. \bibitem[Zhang et~al., 2014]{zhang_canonical_2014} Zhang, Y., Vastenhouw, N.~L., Feng, J., Fu, K., Wang, C., Ge, Y., Pauli, A., Hummelen, P.~v., Schier, A.~F., and Liu, X.~S. (2014). \newblock Canonical nucleosome organization at promoters forms during genome activation. \newblock {\em Genome Research}, 24(2):260--266. \bibitem[Zhao et~al., 2005]{zhao_tred:_2005} Zhao, F., Xuan, Z., Liu, L., and Zhang, M.~Q. (2005). \newblock {TRED}: a {Transcriptional} {Regulatory} {Element} {Database} and a platform for in silico gene regulation studies. \newblock {\em Nucleic Acids Research}, 33(suppl\_1):D103--D107. \bibitem[Zhao et~al., 2009]{zhao_inferring_2009} Zhao, Y., Granas, D., and Stormo, G.~D. (2009). \newblock Inferring {Binding} {Energies} from {Selected} {Binding} {Sites}. \newblock {\em PLOS Comput Biol}, 5(12):e1000590. \bibitem[Zhou et~al., 2011]{zhou_charting_2011} Zhou, V.~W., Goren, A., and Bernstein, B.~E. (2011). \newblock Charting histone modifications and the functional organization of mammalian genomes. \newblock {\em Nature Reviews Genetics}, 12(1):7--18. \end{thebibliography} diff --git a/my_thesis.blg b/my_thesis.blg index a5307a5..e64b3dd 100644 --- a/my_thesis.blg +++ b/my_thesis.blg @@ -1,60 +1,58 @@ This is BibTeX, Version 0.99d (TeX Live 2017/Debian) Capacity: max_strings=100000, hash_size=100000, hash_prime=85009 The top-level auxiliary file: my_thesis.aux A level-1 auxiliary file: head/dedication.aux A level-1 auxiliary file: head/acknowledgements.aux A level-1 auxiliary file: head/preface.aux A level-1 auxiliary file: head/abstracts.aux A level-1 auxiliary file: main/ch_introduction.aux A level-1 auxiliary file: main/ch_group_projects.aux A level-1 auxiliary file: main/ch_encode_peaks.aux A level-1 auxiliary file: main/ch_smile-seq.aux A level-1 auxiliary file: main/ch_atac-seq.aux A level-1 auxiliary file: tail/appendix.aux A level-1 auxiliary file: tail/biblio.aux The style file: apalike.bst A level-1 auxiliary file: tail/cv.aux Database file #1: tail/bibliography.bib -Warning--I didn't find a database entry for "" -You've used 103 entries, +You've used 108 entries, 1935 wiz_defined-function locations, - 1008 strings with 35079 characters, -and the built_in function-call counts, 63060 in all, are: -= -- 5587 -> -- 4921 -< -- 28 -+ -- 1870 -- -- 1847 -* -- 6585 -:= -- 11282 -add.period$ -- 310 -call.type$ -- 103 -change.case$ -- 1438 -chr.to.int$ -- 102 -cite$ -- 103 -duplicate$ -- 1465 -empty$ -- 3073 -format.name$ -- 1986 -if$ -- 11526 + 1029 strings with 36269 characters, +and the built_in function-call counts, 65583 in all, are: += -- 5823 +> -- 5076 +< -- 29 ++ -- 1926 +- -- 1903 +* -- 6839 +:= -- 11727 +add.period$ -- 325 +call.type$ -- 108 +change.case$ -- 1491 +chr.to.int$ -- 107 +cite$ -- 108 +duplicate$ -- 1535 +empty$ -- 3221 +format.name$ -- 2049 +if$ -- 11997 int.to.chr$ -- 2 int.to.str$ -- 0 -missing$ -- 105 -newline$ -- 518 -num.names$ -- 311 -pop$ -- 1223 +missing$ -- 110 +newline$ -- 543 +num.names$ -- 326 +pop$ -- 1265 preamble$ -- 1 -purify$ -- 1438 +purify$ -- 1491 quote$ -- 0 -skip$ -- 1082 +skip$ -- 1133 stack$ -- 0 -substring$ -- 3682 -swap$ -- 126 +substring$ -- 3857 +swap$ -- 131 text.length$ -- 3 text.prefix$ -- 0 top$ -- 0 -type$ -- 614 +type$ -- 644 warning$ -- 0 -while$ -- 386 +while$ -- 405 width$ -- 0 -write$ -- 1343 -(There was 1 warning) +write$ -- 1408 diff --git a/my_thesis.log b/my_thesis.log index baa519c..114cd46 100644 --- a/my_thesis.log +++ b/my_thesis.log @@ -1,2822 +1,2835 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12) 25 NOV 2019 18:23 +This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.12.12) 26 NOV 2019 18:12 entering extended mode restricted \write18 enabled. %&-line parsing enabled. **my_thesis.tex (./my_thesis.tex LaTeX2e <2017-04-15> Babel <3.18> and hyphenation patterns for 84 language(s) loaded. (./head/settings_epfl_template.tex (/usr/share/texlive/texmf-dist/tex/latex/base/book.cls Document Class: book 2014/09/29 v1.4h Standard LaTeX document class (/usr/share/texlive/texmf-dist/tex/latex/base/bk11.clo File: bk11.clo 2014/09/29 v1.4h Standard LaTeX file (size option) ) \c@part=\count79 \c@chapter=\count80 \c@section=\count81 \c@subsection=\count82 \c@subsubsection=\count83 \c@paragraph=\count84 \c@subparagraph=\count85 \c@figure=\count86 \c@table=\count87 \abovecaptionskip=\skip41 \belowcaptionskip=\skip42 \bibindent=\dimen102 ) (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty Package: fontenc 2017/04/05 v2.0i Standard LaTeX package (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file LaTeX Font Info: Redeclaring font encoding T1 on input line 48. )) (/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty Package: inputenc 2015/03/17 v1.2c Input encoding file \inpenc@prehook=\toks14 \inpenc@posthook=\toks15 (/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def File: utf8.def 2017/01/28 v1.1t UTF-8 support for inputenc Now handling font encoding OML ... ... no UTF-8 mapping file for font encoding OML Now handling font encoding T1 ... ... processing UTF-8 mapping file for font encoding T1 (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu File: t1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A0 (decimal 160) defining Unicode char U+00A1 (decimal 161) defining Unicode char U+00A3 (decimal 163) defining Unicode char U+00AB (decimal 171) defining Unicode char U+00AD (decimal 173) defining Unicode char U+00BB (decimal 187) defining Unicode char U+00BF (decimal 191) defining Unicode char U+00C0 (decimal 192) defining Unicode char U+00C1 (decimal 193) defining Unicode char U+00C2 (decimal 194) defining Unicode char U+00C3 (decimal 195) defining Unicode char U+00C4 (decimal 196) defining Unicode char U+00C5 (decimal 197) defining Unicode char U+00C6 (decimal 198) defining Unicode char U+00C7 (decimal 199) defining Unicode char U+00C8 (decimal 200) defining Unicode char U+00C9 (decimal 201) defining Unicode char U+00CA (decimal 202) defining Unicode char U+00CB (decimal 203) defining Unicode char U+00CC (decimal 204) defining Unicode char U+00CD (decimal 205) defining Unicode char U+00CE (decimal 206) defining Unicode char U+00CF (decimal 207) defining Unicode char U+00D0 (decimal 208) defining Unicode char U+00D1 (decimal 209) defining Unicode char U+00D2 (decimal 210) defining Unicode char U+00D3 (decimal 211) defining Unicode char U+00D4 (decimal 212) defining Unicode char U+00D5 (decimal 213) defining Unicode char U+00D6 (decimal 214) defining Unicode char U+00D8 (decimal 216) defining Unicode char U+00D9 (decimal 217) defining Unicode char U+00DA (decimal 218) defining Unicode char U+00DB (decimal 219) defining Unicode char U+00DC (decimal 220) defining Unicode char U+00DD (decimal 221) defining Unicode char U+00DE (decimal 222) defining Unicode char U+00DF (decimal 223) defining Unicode char U+00E0 (decimal 224) defining Unicode char U+00E1 (decimal 225) defining Unicode char U+00E2 (decimal 226) defining Unicode char U+00E3 (decimal 227) defining Unicode char U+00E4 (decimal 228) defining Unicode char U+00E5 (decimal 229) defining Unicode char U+00E6 (decimal 230) defining Unicode char U+00E7 (decimal 231) defining Unicode char U+00E8 (decimal 232) defining Unicode char U+00E9 (decimal 233) defining Unicode char U+00EA (decimal 234) defining Unicode char U+00EB (decimal 235) defining Unicode char U+00EC (decimal 236) defining Unicode char U+00ED (decimal 237) defining Unicode char U+00EE (decimal 238) defining Unicode char U+00EF (decimal 239) defining Unicode char U+00F0 (decimal 240) defining Unicode char U+00F1 (decimal 241) defining Unicode char U+00F2 (decimal 242) defining Unicode char U+00F3 (decimal 243) defining Unicode char U+00F4 (decimal 244) defining Unicode char U+00F5 (decimal 245) defining Unicode char U+00F6 (decimal 246) defining Unicode char U+00F8 (decimal 248) defining Unicode char U+00F9 (decimal 249) defining Unicode char U+00FA (decimal 250) defining Unicode char U+00FB (decimal 251) defining Unicode char U+00FC (decimal 252) defining Unicode char U+00FD (decimal 253) defining Unicode char U+00FE (decimal 254) defining Unicode char U+00FF (decimal 255) defining Unicode char U+0100 (decimal 256) defining Unicode char U+0101 (decimal 257) defining Unicode char U+0102 (decimal 258) defining Unicode char U+0103 (decimal 259) defining Unicode char U+0104 (decimal 260) defining Unicode char U+0105 (decimal 261) defining Unicode char U+0106 (decimal 262) defining Unicode char U+0107 (decimal 263) defining Unicode char U+0108 (decimal 264) defining Unicode char U+0109 (decimal 265) defining Unicode char U+010A (decimal 266) defining Unicode char U+010B (decimal 267) defining Unicode char U+010C (decimal 268) defining Unicode char U+010D (decimal 269) defining Unicode char U+010E (decimal 270) defining Unicode char U+010F (decimal 271) defining Unicode char U+0110 (decimal 272) defining Unicode char U+0111 (decimal 273) defining Unicode char U+0112 (decimal 274) defining Unicode char U+0113 (decimal 275) defining Unicode char U+0114 (decimal 276) defining Unicode char U+0115 (decimal 277) defining Unicode char U+0116 (decimal 278) defining Unicode char U+0117 (decimal 279) defining Unicode char U+0118 (decimal 280) defining Unicode char U+0119 (decimal 281) defining Unicode char U+011A (decimal 282) defining Unicode char U+011B (decimal 283) defining Unicode char U+011C (decimal 284) defining Unicode char U+011D (decimal 285) defining Unicode char U+011E (decimal 286) defining Unicode char U+011F (decimal 287) defining Unicode char U+0120 (decimal 288) defining Unicode char U+0121 (decimal 289) defining Unicode char U+0122 (decimal 290) defining Unicode char U+0123 (decimal 291) defining Unicode char U+0124 (decimal 292) defining Unicode char U+0125 (decimal 293) defining Unicode char U+0128 (decimal 296) defining Unicode char U+0129 (decimal 297) defining Unicode char U+012A (decimal 298) defining Unicode char U+012B (decimal 299) defining Unicode char U+012C (decimal 300) defining Unicode char U+012D (decimal 301) defining Unicode char U+012E (decimal 302) defining Unicode char U+012F (decimal 303) defining Unicode char U+0130 (decimal 304) defining Unicode char U+0131 (decimal 305) defining Unicode char U+0132 (decimal 306) defining Unicode char U+0133 (decimal 307) defining Unicode char U+0134 (decimal 308) defining Unicode char U+0135 (decimal 309) defining Unicode char U+0136 (decimal 310) defining Unicode char U+0137 (decimal 311) defining Unicode char U+0139 (decimal 313) defining Unicode char U+013A (decimal 314) defining Unicode char U+013B (decimal 315) defining Unicode char U+013C (decimal 316) defining Unicode char U+013D (decimal 317) defining Unicode char U+013E (decimal 318) defining Unicode char U+0141 (decimal 321) defining Unicode char U+0142 (decimal 322) defining Unicode char U+0143 (decimal 323) defining Unicode char U+0144 (decimal 324) defining Unicode char U+0145 (decimal 325) defining Unicode char U+0146 (decimal 326) defining Unicode char U+0147 (decimal 327) defining Unicode char U+0148 (decimal 328) defining Unicode char U+014A (decimal 330) defining Unicode char U+014B (decimal 331) defining Unicode char U+014C (decimal 332) defining Unicode char U+014D (decimal 333) defining Unicode char U+014E (decimal 334) defining Unicode char U+014F (decimal 335) defining Unicode char U+0150 (decimal 336) defining Unicode char U+0151 (decimal 337) defining Unicode char U+0152 (decimal 338) defining Unicode char U+0153 (decimal 339) defining Unicode char U+0154 (decimal 340) defining Unicode char U+0155 (decimal 341) defining Unicode char U+0156 (decimal 342) defining Unicode char U+0157 (decimal 343) defining Unicode char U+0158 (decimal 344) defining Unicode char U+0159 (decimal 345) defining Unicode char U+015A (decimal 346) defining Unicode char U+015B (decimal 347) defining Unicode char U+015C (decimal 348) defining Unicode char U+015D (decimal 349) defining Unicode char U+015E (decimal 350) defining Unicode char U+015F (decimal 351) defining Unicode char U+0160 (decimal 352) defining Unicode char U+0161 (decimal 353) defining Unicode char U+0162 (decimal 354) defining Unicode char U+0163 (decimal 355) defining Unicode char U+0164 (decimal 356) defining Unicode char U+0165 (decimal 357) defining Unicode char U+0168 (decimal 360) defining Unicode char U+0169 (decimal 361) defining Unicode char U+016A (decimal 362) defining Unicode char U+016B (decimal 363) defining Unicode char U+016C (decimal 364) defining Unicode char U+016D (decimal 365) defining Unicode char U+016E (decimal 366) defining Unicode char U+016F (decimal 367) defining Unicode char U+0170 (decimal 368) defining Unicode char U+0171 (decimal 369) defining Unicode char U+0172 (decimal 370) defining Unicode char U+0173 (decimal 371) defining Unicode char U+0174 (decimal 372) defining Unicode char U+0175 (decimal 373) defining Unicode char U+0176 (decimal 374) defining Unicode char U+0177 (decimal 375) defining Unicode char U+0178 (decimal 376) defining Unicode char U+0179 (decimal 377) defining Unicode char U+017A (decimal 378) defining Unicode char U+017B (decimal 379) defining Unicode char U+017C (decimal 380) defining Unicode char U+017D (decimal 381) defining Unicode char U+017E (decimal 382) defining Unicode char U+01CD (decimal 461) defining Unicode char U+01CE (decimal 462) defining Unicode char U+01CF (decimal 463) defining Unicode char U+01D0 (decimal 464) defining Unicode char U+01D1 (decimal 465) defining Unicode char U+01D2 (decimal 466) defining Unicode char U+01D3 (decimal 467) defining Unicode char U+01D4 (decimal 468) defining Unicode char U+01E2 (decimal 482) defining Unicode char U+01E3 (decimal 483) defining Unicode char U+01E6 (decimal 486) defining Unicode char U+01E7 (decimal 487) defining Unicode char U+01E8 (decimal 488) defining Unicode char U+01E9 (decimal 489) defining Unicode char U+01EA (decimal 490) defining Unicode char U+01EB (decimal 491) defining Unicode char U+01F0 (decimal 496) defining Unicode char U+01F4 (decimal 500) defining Unicode char U+01F5 (decimal 501) defining Unicode char U+0218 (decimal 536) defining Unicode char U+0219 (decimal 537) defining Unicode char U+021A (decimal 538) defining Unicode char U+021B (decimal 539) defining Unicode char U+0232 (decimal 562) defining Unicode char U+0233 (decimal 563) defining Unicode char U+1E02 (decimal 7682) defining Unicode char U+1E03 (decimal 7683) defining Unicode char U+200C (decimal 8204) defining Unicode char U+2010 (decimal 8208) defining Unicode char U+2011 (decimal 8209) defining Unicode char U+2012 (decimal 8210) defining Unicode char U+2013 (decimal 8211) defining Unicode char U+2014 (decimal 8212) defining Unicode char U+2015 (decimal 8213) defining Unicode char U+2018 (decimal 8216) defining Unicode char U+2019 (decimal 8217) defining Unicode char U+201A (decimal 8218) defining Unicode char U+201C (decimal 8220) defining Unicode char U+201D (decimal 8221) defining Unicode char U+201E (decimal 8222) defining Unicode char U+2030 (decimal 8240) defining Unicode char U+2031 (decimal 8241) defining Unicode char U+2039 (decimal 8249) defining Unicode char U+203A (decimal 8250) defining Unicode char U+2423 (decimal 9251) defining Unicode char U+1E20 (decimal 7712) defining Unicode char U+1E21 (decimal 7713) ) Now handling font encoding OT1 ... ... processing UTF-8 mapping file for font encoding OT1 (/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu File: ot1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A0 (decimal 160) defining Unicode char U+00A1 (decimal 161) defining Unicode char U+00A3 (decimal 163) defining Unicode char U+00AD (decimal 173) defining Unicode char U+00B8 (decimal 184) defining Unicode char U+00BF (decimal 191) defining Unicode char U+00C5 (decimal 197) defining Unicode char U+00C6 (decimal 198) defining Unicode char U+00D8 (decimal 216) defining Unicode char U+00DF (decimal 223) defining Unicode char U+00E6 (decimal 230) defining Unicode char U+00EC (decimal 236) defining Unicode char U+00ED (decimal 237) defining Unicode char U+00EE (decimal 238) defining Unicode char U+00EF (decimal 239) defining Unicode char U+00F8 (decimal 248) defining Unicode char U+0131 (decimal 305) defining Unicode char U+0141 (decimal 321) defining Unicode char U+0142 (decimal 322) defining Unicode char U+0152 (decimal 338) defining Unicode char U+0153 (decimal 339) defining Unicode char U+0174 (decimal 372) defining Unicode char U+0175 (decimal 373) defining Unicode char U+0176 (decimal 374) defining Unicode char U+0177 (decimal 375) defining Unicode char U+0218 (decimal 536) defining Unicode char U+0219 (decimal 537) defining Unicode char U+021A (decimal 538) defining Unicode char U+021B (decimal 539) defining Unicode char U+2013 (decimal 8211) defining Unicode char U+2014 (decimal 8212) defining Unicode char U+2018 (decimal 8216) defining Unicode char U+2019 (decimal 8217) defining Unicode char U+201C (decimal 8220) defining Unicode char U+201D (decimal 8221) ) Now handling font encoding OMS ... ... processing UTF-8 mapping file for font encoding OMS (/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu File: omsenc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A7 (decimal 167) defining Unicode char U+00B6 (decimal 182) defining Unicode char U+00B7 (decimal 183) defining Unicode char U+2020 (decimal 8224) defining Unicode char U+2021 (decimal 8225) defining Unicode char U+2022 (decimal 8226) ) Now handling font encoding OMX ... ... no UTF-8 mapping file for font encoding OMX Now handling font encoding U ... ... no UTF-8 mapping file for font encoding U defining Unicode char U+00A9 (decimal 169) defining Unicode char U+00AA (decimal 170) defining Unicode char U+00AE (decimal 174) defining Unicode char U+00BA (decimal 186) defining Unicode char U+02C6 (decimal 710) defining Unicode char U+02DC (decimal 732) defining Unicode char U+200C (decimal 8204) defining Unicode char U+2026 (decimal 8230) defining Unicode char U+2122 (decimal 8482) defining Unicode char U+2423 (decimal 9251) )) (/usr/share/texlive/texmf-dist/tex/latex/natbib/natbib.sty Package: natbib 2010/09/13 8.31b (PWD, AO) \bibhang=\skip43 \bibsep=\skip44 LaTeX Info: Redefining \cite on input line 694. \c@NAT@ctr=\count88 ) (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.sty Package: babel 2018/02/14 3.18 The Babel package (/usr/share/texlive/texmf-dist/tex/generic/babel/switch.def File: switch.def 2018/02/14 3.18 Babel switching mechanism ) (/usr/share/texlive/texmf-dist/tex/generic/babel-french/french.ldf Language: french 2018/02/04 v3.4b French support from the babel system (/usr/share/texlive/texmf-dist/tex/generic/babel/babel.def File: babel.def 2018/02/14 3.18 Babel common definitions \babel@savecnt=\count89 \U@D=\dimen103 (/usr/share/texlive/texmf-dist/tex/generic/babel/txtbabel.def) \bbl@dirlevel=\count90 ) \l@acadian = a dialect from \language\l@french \FB@nonchar=\count91 Package babel Info: Making : an active character on input line 411. Package babel Info: Making ; an active character on input line 412. Package babel Info: Making ! an active character on input line 413. Package babel Info: Making ? an active character on input line 414. \FBguill@level=\count92 \FB@everypar=\toks16 \FB@Mht=\dimen104 \mc@charclass=\count93 \mc@charfam=\count94 \mc@charslot=\count95 \std@mcc=\count96 \dec@mcc=\count97 \c@FBcaption@count=\count98 \listindentFB=\skip45 \descindentFB=\skip46 \labelwidthFB=\skip47 \leftmarginFB=\skip48 \parindentFFN=\dimen105 \FBfnindent=\skip49 ) (/usr/share/texlive/texmf-dist/tex/generic/babel-german/german.ldf Language: german 2016/11/02 v2.9 German support for babel (traditional orthogra phy) (/usr/share/texlive/texmf-dist/tex/generic/babel-german/germanb.ldf Language: germanb 2016/11/02 v2.9 German support for babel (traditional orthogr aphy) Package babel Info: Making " an active character on input line 139. )) (/usr/share/texlive/texmf-dist/tex/generic/babel-english/english.ldf Language: english 2017/06/06 v3.3r English support from the babel system \l@canadian = a dialect from \language\l@american \l@australian = a dialect from \language\l@british \l@newzealand = a dialect from \language\l@british )) (/usr/share/texlive/texmf-dist/tex/latex/carlisle/scalefnt.sty) (/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty Package: keyval 2014/10/28 v1.15 key=value parser (DPC) \KV@toks@=\toks17 ) (/usr/share/texmf/tex/latex/lm/lmodern.sty Package: lmodern 2009/10/30 v1.6 Latin Modern Fonts LaTeX Font Info: Overwriting symbol font `operators' in version `normal' (Font) OT1/cmr/m/n --> OT1/lmr/m/n on input line 22. LaTeX Font Info: Overwriting symbol font `letters' in version `normal' (Font) OML/cmm/m/it --> OML/lmm/m/it on input line 23. LaTeX Font Info: Overwriting symbol font `symbols' in version `normal' (Font) OMS/cmsy/m/n --> OMS/lmsy/m/n on input line 24. LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal' (Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 25. LaTeX Font Info: Overwriting symbol font `operators' in version `bold' (Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 26. LaTeX Font Info: Overwriting symbol font `letters' in version `bold' (Font) OML/cmm/b/it --> OML/lmm/b/it on input line 27. LaTeX Font Info: Overwriting symbol font `symbols' in version `bold' (Font) OMS/cmsy/b/n --> OMS/lmsy/b/n on input line 28. LaTeX Font Info: Overwriting symbol font `largesymbols' in version `bold' (Font) OMX/cmex/m/n --> OMX/lmex/m/n on input line 29. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' (Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 31. LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal' (Font) OT1/cmss/m/n --> OT1/lmss/m/n on input line 32. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' (Font) OT1/cmr/m/it --> OT1/lmr/m/it on input line 33. LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal' (Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 34. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `bold' (Font) OT1/cmr/bx/n --> OT1/lmr/bx/n on input line 35. LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold' (Font) OT1/cmss/bx/n --> OT1/lmss/bx/n on input line 36. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' (Font) OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37. LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold' (Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38. ) (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier.sty Package: fourier 2005/01/01 1.4 fourier-GUTenberg package Now handling font encoding FML ... ... no UTF-8 mapping file for font encoding FML Now handling font encoding FMS ... ... no UTF-8 mapping file for font encoding FMS Now handling font encoding FMX ... ... no UTF-8 mapping file for font encoding FMX (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty Package: fontenc 2017/04/05 v2.0i Standard LaTeX package (/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def File: t1enc.def 2017/04/05 v2.0i Standard LaTeX file LaTeX Font Info: Redeclaring font encoding T1 on input line 48. )) (/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty Package: textcomp 2017/04/05 v2.0i Standard LaTeX package Package textcomp Info: Sub-encoding information: (textcomp) 5 = only ISO-Adobe without \textcurrency (textcomp) 4 = 5 + \texteuro (textcomp) 3 = 4 + \textohm (textcomp) 2 = 3 + \textestimated + \textcurrency (textcomp) 1 = TS1 - \textcircled - \t (textcomp) 0 = TS1 (full) (textcomp) Font families with sub-encoding setting implement (textcomp) only a restricted character set as indicated. (textcomp) Family '?' is the default used for unknown fonts. (textcomp) See the documentation for details. Package textcomp Info: Setting ? sub-encoding to TS1/1 on input line 79. (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file Now handling font encoding TS1 ... ... processing UTF-8 mapping file for font encoding TS1 (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu File: ts1enc.dfu 2017/01/28 v1.1t UTF-8 support for inputenc defining Unicode char U+00A2 (decimal 162) defining Unicode char U+00A3 (decimal 163) defining Unicode char U+00A4 (decimal 164) defining Unicode char U+00A5 (decimal 165) defining Unicode char U+00A6 (decimal 166) defining Unicode char U+00A7 (decimal 167) defining Unicode char U+00A8 (decimal 168) defining Unicode char U+00A9 (decimal 169) defining Unicode char U+00AA (decimal 170) defining Unicode char U+00AC (decimal 172) defining Unicode char U+00AE (decimal 174) defining Unicode char U+00AF (decimal 175) defining Unicode char U+00B0 (decimal 176) defining Unicode char U+00B1 (decimal 177) defining Unicode char U+00B2 (decimal 178) defining Unicode char U+00B3 (decimal 179) defining Unicode char U+00B4 (decimal 180) defining Unicode char U+00B5 (decimal 181) defining Unicode char U+00B6 (decimal 182) defining Unicode char U+00B7 (decimal 183) defining Unicode char U+00B9 (decimal 185) defining Unicode char U+00BA (decimal 186) defining Unicode char U+00BC (decimal 188) defining Unicode char U+00BD (decimal 189) defining Unicode char U+00BE (decimal 190) defining Unicode char U+00D7 (decimal 215) defining Unicode char U+00F7 (decimal 247) defining Unicode char U+0192 (decimal 402) defining Unicode char U+02C7 (decimal 711) defining Unicode char U+02D8 (decimal 728) defining Unicode char U+02DD (decimal 733) defining Unicode char U+0E3F (decimal 3647) defining Unicode char U+2016 (decimal 8214) defining Unicode char U+2020 (decimal 8224) defining Unicode char U+2021 (decimal 8225) defining Unicode char U+2022 (decimal 8226) defining Unicode char U+2030 (decimal 8240) defining Unicode char U+2031 (decimal 8241) defining Unicode char U+203B (decimal 8251) defining Unicode char U+203D (decimal 8253) defining Unicode char U+2044 (decimal 8260) defining Unicode char U+204E (decimal 8270) defining Unicode char U+2052 (decimal 8274) defining Unicode char U+20A1 (decimal 8353) defining Unicode char U+20A4 (decimal 8356) defining Unicode char U+20A6 (decimal 8358) defining Unicode char U+20A9 (decimal 8361) defining Unicode char U+20AB (decimal 8363) defining Unicode char U+20AC (decimal 8364) defining Unicode char U+20B1 (decimal 8369) defining Unicode char U+2103 (decimal 8451) defining Unicode char U+2116 (decimal 8470) defining Unicode char U+2117 (decimal 8471) defining Unicode char U+211E (decimal 8478) defining Unicode char U+2120 (decimal 8480) defining Unicode char U+2122 (decimal 8482) defining Unicode char U+2126 (decimal 8486) defining Unicode char U+2127 (decimal 8487) defining Unicode char U+212E (decimal 8494) defining Unicode char U+2190 (decimal 8592) defining Unicode char U+2191 (decimal 8593) defining Unicode char U+2192 (decimal 8594) defining Unicode char U+2193 (decimal 8595) defining Unicode char U+2329 (decimal 9001) defining Unicode char U+232A (decimal 9002) defining Unicode char U+2422 (decimal 9250) defining Unicode char U+25E6 (decimal 9702) defining Unicode char U+25EF (decimal 9711) defining Unicode char U+266A (decimal 9834) )) LaTeX Info: Redefining \oldstylenums on input line 334. Package textcomp Info: Setting cmr sub-encoding to TS1/0 on input line 349. Package textcomp Info: Setting cmss sub-encoding to TS1/0 on input line 350. Package textcomp Info: Setting cmtt sub-encoding to TS1/0 on input line 351. Package textcomp Info: Setting cmvtt sub-encoding to TS1/0 on input line 352. Package textcomp Info: Setting cmbr sub-encoding to TS1/0 on input line 353. Package textcomp Info: Setting cmtl sub-encoding to TS1/0 on input line 354. Package textcomp Info: Setting ccr sub-encoding to TS1/0 on input line 355. Package textcomp Info: Setting ptm sub-encoding to TS1/4 on input line 356. Package textcomp Info: Setting pcr sub-encoding to TS1/4 on input line 357. Package textcomp Info: Setting phv sub-encoding to TS1/4 on input line 358. Package textcomp Info: Setting ppl sub-encoding to TS1/3 on input line 359. Package textcomp Info: Setting pag sub-encoding to TS1/4 on input line 360. Package textcomp Info: Setting pbk sub-encoding to TS1/4 on input line 361. Package textcomp Info: Setting pnc sub-encoding to TS1/4 on input line 362. Package textcomp Info: Setting pzc sub-encoding to TS1/4 on input line 363. Package textcomp Info: Setting bch sub-encoding to TS1/4 on input line 364. Package textcomp Info: Setting put sub-encoding to TS1/5 on input line 365. Package textcomp Info: Setting uag sub-encoding to TS1/5 on input line 366. Package textcomp Info: Setting ugq sub-encoding to TS1/5 on input line 367. Package textcomp Info: Setting ul8 sub-encoding to TS1/4 on input line 368. Package textcomp Info: Setting ul9 sub-encoding to TS1/4 on input line 369. Package textcomp Info: Setting augie sub-encoding to TS1/5 on input line 370. Package textcomp Info: Setting dayrom sub-encoding to TS1/3 on input line 371. Package textcomp Info: Setting dayroms sub-encoding to TS1/3 on input line 372. Package textcomp Info: Setting pxr sub-encoding to TS1/0 on input line 373. Package textcomp Info: Setting pxss sub-encoding to TS1/0 on input line 374. Package textcomp Info: Setting pxtt sub-encoding to TS1/0 on input line 375. Package textcomp Info: Setting txr sub-encoding to TS1/0 on input line 376. Package textcomp Info: Setting txss sub-encoding to TS1/0 on input line 377. Package textcomp Info: Setting txtt sub-encoding to TS1/0 on input line 378. Package textcomp Info: Setting lmr sub-encoding to TS1/0 on input line 379. Package textcomp Info: Setting lmdh sub-encoding to TS1/0 on input line 380. Package textcomp Info: Setting lmss sub-encoding to TS1/0 on input line 381. Package textcomp Info: Setting lmssq sub-encoding to TS1/0 on input line 382. Package textcomp Info: Setting lmvtt sub-encoding to TS1/0 on input line 383. Package textcomp Info: Setting lmtt sub-encoding to TS1/0 on input line 384. Package textcomp Info: Setting qhv sub-encoding to TS1/0 on input line 385. Package textcomp Info: Setting qag sub-encoding to TS1/0 on input line 386. Package textcomp Info: Setting qbk sub-encoding to TS1/0 on input line 387. Package textcomp Info: Setting qcr sub-encoding to TS1/0 on input line 388. Package textcomp Info: Setting qcs sub-encoding to TS1/0 on input line 389. Package textcomp Info: Setting qpl sub-encoding to TS1/0 on input line 390. Package textcomp Info: Setting qtm sub-encoding to TS1/0 on input line 391. Package textcomp Info: Setting qzc sub-encoding to TS1/0 on input line 392. Package textcomp Info: Setting qhvc sub-encoding to TS1/0 on input line 393. Package textcomp Info: Setting futs sub-encoding to TS1/4 on input line 394. Package textcomp Info: Setting futx sub-encoding to TS1/4 on input line 395. Package textcomp Info: Setting futj sub-encoding to TS1/4 on input line 396. Package textcomp Info: Setting hlh sub-encoding to TS1/3 on input line 397. Package textcomp Info: Setting hls sub-encoding to TS1/3 on input line 398. Package textcomp Info: Setting hlst sub-encoding to TS1/3 on input line 399. Package textcomp Info: Setting hlct sub-encoding to TS1/5 on input line 400. Package textcomp Info: Setting hlx sub-encoding to TS1/5 on input line 401. Package textcomp Info: Setting hlce sub-encoding to TS1/5 on input line 402. Package textcomp Info: Setting hlcn sub-encoding to TS1/5 on input line 403. Package textcomp Info: Setting hlcw sub-encoding to TS1/5 on input line 404. Package textcomp Info: Setting hlcf sub-encoding to TS1/5 on input line 405. Package textcomp Info: Setting pplx sub-encoding to TS1/3 on input line 406. Package textcomp Info: Setting pplj sub-encoding to TS1/3 on input line 407. Package textcomp Info: Setting ptmx sub-encoding to TS1/4 on input line 408. Package textcomp Info: Setting ptmj sub-encoding to TS1/4 on input line 409. ) (/usr/share/texlive/texmf-dist/tex/latex/fourier/fourier-orns.sty Package: fourier-orns 2004/01/30 1.1 fourier-ornaments package ) LaTeX Font Info: Redeclaring symbol font `operators' on input line 50. LaTeX Font Info: Encoding `OT1' has changed to `T1' for symbol font (Font) `operators' in the math version `normal' on input line 50. LaTeX Font Info: Overwriting symbol font `operators' in version `normal' (Font) OT1/lmr/m/n --> T1/futs/m/n on input line 50. LaTeX Font Info: Encoding `OT1' has changed to `T1' for symbol font (Font) `operators' in the math version `bold' on input line 50. LaTeX Font Info: Overwriting symbol font `operators' in version `bold' (Font) OT1/lmr/bx/n --> T1/futs/m/n on input line 50. LaTeX Font Info: Overwriting symbol font `operators' in version `bold' (Font) T1/futs/m/n --> T1/futs/b/n on input line 51. LaTeX Font Info: Redeclaring symbol font `letters' on input line 59. LaTeX Font Info: Encoding `OML' has changed to `FML' for symbol font (Font) `letters' in the math version `normal' on input line 59. LaTeX Font Info: Overwriting symbol font `letters' in version `normal' (Font) OML/lmm/m/it --> FML/futmi/m/it on input line 59. LaTeX Font Info: Encoding `OML' has changed to `FML' for symbol font (Font) `letters' in the math version `bold' on input line 59. LaTeX Font Info: Overwriting symbol font `letters' in version `bold' (Font) OML/lmm/b/it --> FML/futmi/m/it on input line 59. \symotherletters=\mathgroup4 LaTeX Font Info: Overwriting symbol font `letters' in version `bold' (Font) FML/futmi/m/it --> FML/futmi/b/it on input line 61. LaTeX Font Info: Overwriting symbol font `otherletters' in version `bold' (Font) FML/futm/m/it --> FML/futm/b/it on input line 62. LaTeX Font Info: Redeclaring math symbol \Gamma on input line 63. LaTeX Font Info: Redeclaring math symbol \Delta on input line 64. LaTeX Font Info: Redeclaring math symbol \Theta on input line 65. LaTeX Font Info: Redeclaring math symbol \Lambda on input line 66. LaTeX Font Info: Redeclaring math symbol \Xi on input line 67. LaTeX Font Info: Redeclaring math symbol \Pi on input line 68. LaTeX Font Info: Redeclaring math symbol \Sigma on input line 69. LaTeX Font Info: Redeclaring math symbol \Upsilon on input line 70. LaTeX Font Info: Redeclaring math symbol \Phi on input line 71. LaTeX Font Info: Redeclaring math symbol \Psi on input line 72. LaTeX Font Info: Redeclaring math symbol \Omega on input line 73. LaTeX Font Info: Redeclaring symbol font `symbols' on input line 113. LaTeX Font Info: Encoding `OMS' has changed to `FMS' for symbol font (Font) `symbols' in the math version `normal' on input line 113. LaTeX Font Info: Overwriting symbol font `symbols' in version `normal' (Font) OMS/lmsy/m/n --> FMS/futm/m/n on input line 113. LaTeX Font Info: Encoding `OMS' has changed to `FMS' for symbol font (Font) `symbols' in the math version `bold' on input line 113. LaTeX Font Info: Overwriting symbol font `symbols' in version `bold' (Font) OMS/lmsy/b/n --> FMS/futm/m/n on input line 113. LaTeX Font Info: Redeclaring symbol font `largesymbols' on input line 114. LaTeX Font Info: Encoding `OMX' has changed to `FMX' for symbol font (Font) `largesymbols' in the math version `normal' on input line 1 14. LaTeX Font Info: Overwriting symbol font `largesymbols' in version `normal' (Font) OMX/lmex/m/n --> FMX/futm/m/n on input line 114. LaTeX Font Info: Encoding `OMX' has changed to `FMX' for symbol font (Font) `largesymbols' in the math version `bold' on input line 114 . LaTeX Font Info: Overwriting symbol font `largesymbols' in version `bold' (Font) OMX/lmex/m/n --> FMX/futm/m/n on input line 114. LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 115. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' (Font) OT1/lmr/bx/n --> T1/futs/bx/n on input line 115. LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `bold' (Font) OT1/lmr/bx/n --> T1/futs/bx/n on input line 115. LaTeX Font Info: Redeclaring math alphabet \mathrm on input line 116. LaTeX Font Info: Redeclaring math alphabet \mathit on input line 117. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' (Font) OT1/lmr/m/it --> T1/futs/m/it on input line 117. LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' (Font) OT1/lmr/bx/it --> T1/futs/m/it on input line 117. LaTeX Font Info: Redeclaring math alphabet \mathcal on input line 118. LaTeX Font Info: Redeclaring math symbol \parallel on input line 134. LaTeX Font Info: Redeclaring math symbol \hbar on input line 148. LaTeX Font Info: Redeclaring math symbol \varkappa on input line 186. LaTeX Font Info: Redeclaring math symbol \varvarrho on input line 187. LaTeX Font Info: Redeclaring math delimiter \Vert on input line 210. LaTeX Font Info: Redeclaring math delimiter \vert on input line 215. LaTeX Font Info: Redeclaring math delimiter \Downarrow on input line 225. LaTeX Font Info: Redeclaring math delimiter \backslash on input line 227. LaTeX Font Info: Redeclaring math delimiter \rangle on input line 229. LaTeX Font Info: Redeclaring math delimiter \langle on input line 231. LaTeX Font Info: Redeclaring math delimiter \rbrace on input line 233. LaTeX Font Info: Redeclaring math delimiter \lbrace on input line 235. LaTeX Font Info: Redeclaring math delimiter \rceil on input line 237. LaTeX Font Info: Redeclaring math delimiter \lceil on input line 239. LaTeX Font Info: Redeclaring math delimiter \rfloor on input line 241. LaTeX Font Info: Redeclaring math delimiter \lfloor on input line 243. LaTeX Font Info: Redeclaring math accent \acute on input line 247. LaTeX Font Info: Redeclaring math accent \grave on input line 248. LaTeX Font Info: Redeclaring math accent \ddot on input line 249. LaTeX Font Info: Redeclaring math accent \tilde on input line 250. LaTeX Font Info: Redeclaring math accent \bar on input line 251. LaTeX Font Info: Redeclaring math accent \breve on input line 252. LaTeX Font Info: Redeclaring math accent \check on input line 253. LaTeX Font Info: Redeclaring math accent \hat on input line 254. LaTeX Font Info: Redeclaring math accent \dot on input line 255. LaTeX Font Info: Redeclaring math accent \mathring on input line 256. \symUfutm=\mathgroup5 ) (/usr/share/texlive/texmf-dist/tex/latex/setspace/setspace.sty Package: setspace 2011/12/19 v6.7a set line spacing ) (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphicx.sty Package: graphicx 2017/06/01 v1.1a Enhanced LaTeX Graphics (DPC,SPQR) (/usr/share/texlive/texmf-dist/tex/latex/graphics/graphics.sty Package: graphics 2017/06/25 v1.2c Standard LaTeX Graphics (DPC,SPQR) (/usr/share/texlive/texmf-dist/tex/latex/graphics/trig.sty Package: trig 2016/01/03 v1.10 sin cos tan (DPC) ) (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/graphics.cfg File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration ) Package graphics Info: Driver file: pdftex.def on input line 99. (/usr/share/texlive/texmf-dist/tex/latex/graphics-def/pdftex.def File: pdftex.def 2018/01/08 v1.0l Graphics/color driver for pdftex )) \Gin@req@height=\dimen106 \Gin@req@width=\dimen107 ) (/usr/share/texlive/texmf-dist/tex/latex/xcolor/xcolor.sty Package: xcolor 2016/05/11 v2.12 LaTeX color extensions (UK) (/usr/share/texlive/texmf-dist/tex/latex/graphics-cfg/color.cfg File: color.cfg 2016/01/02 v1.6 sample color configuration ) Package xcolor Info: Driver file: pdftex.def on input line 225. Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1348. Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1352. Package xcolor Info: Model `RGB' extended on input line 1364. Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1366. Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1367. Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1368. Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1369. Package xcolor Info: Model `Gray' substituted by `gray' on input line 1370. Package xcolor Info: Model `wave' substituted by `hsb' on input line 1371. ) (/usr/share/texlive/texmf-dist/tex/latex/subfig/subfig.sty Package: subfig 2005/06/28 ver: 1.3 subfig package (/usr/share/texlive/texmf-dist/tex/latex/caption/caption.sty Package: caption 2016/02/21 v3.3-144 Customizing captions (AR) (/usr/share/texlive/texmf-dist/tex/latex/caption/caption3.sty Package: caption3 2016/05/22 v1.7-166 caption3 kernel (AR) Package caption3 Info: TeX engine: e-TeX on input line 67. \captionmargin=\dimen108 \captionmargin@=\dimen109 \captionwidth=\dimen110 \caption@tempdima=\dimen111 \caption@indent=\dimen112 \caption@parindent=\dimen113 \caption@hangindent=\dimen114 ) \c@ContinuedFloat=\count99 ) \c@KVtest=\count100 \sf@farskip=\skip50 \sf@captopadj=\dimen115 \sf@capskip=\skip51 \sf@nearskip=\skip52 \c@subfigure=\count101 \c@subfigure@save=\count102 \c@lofdepth=\count103 \c@subtable=\count104 \c@subtable@save=\count105 \c@lotdepth=\count106 \sf@top=\skip53 \sf@bottom=\skip54 ) (/usr/share/texlive/texmf-dist/tex/latex/booktabs/booktabs.sty Package: booktabs 2016/04/27 v1.618033 publication quality tables \heavyrulewidth=\dimen116 \lightrulewidth=\dimen117 \cmidrulewidth=\dimen118 \belowrulesep=\dimen119 \belowbottomsep=\dimen120 \aboverulesep=\dimen121 \abovetopsep=\dimen122 \cmidrulesep=\dimen123 \cmidrulekern=\dimen124 \defaultaddspace=\dimen125 \@cmidla=\count107 \@cmidlb=\count108 \@aboverulesep=\dimen126 \@belowrulesep=\dimen127 \@thisruleclass=\count109 \@lastruleclass=\count110 \@thisrulewidth=\dimen128 ) (/usr/share/texlive/texmf-dist/tex/latex/lipsum/lipsum.sty Package: lipsum 2014/07/27 v1.3 150 paragraphs of Lorem Ipsum dummy text \c@lips@count=\count111 ) (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.sty Package: microtype 2018/01/14 v2.7a Micro-typographical refinements (RS) \MT@toks=\toks18 \MT@count=\count112 LaTeX Info: Redefining \textls on input line 793. \MT@outer@kern=\dimen129 LaTeX Info: Redefining \textmicrotypecontext on input line 1339. \MT@listname@count=\count113 (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype-pdftex.def File: microtype-pdftex.def 2018/01/14 v2.7a Definitions specific to pdftex (RS) LaTeX Info: Redefining \lsstyle on input line 913. LaTeX Info: Redefining \lslig on input line 913. \MT@outer@space=\skip55 ) Package microtype Info: Loading configuration file microtype.cfg. (/usr/share/texlive/texmf-dist/tex/latex/microtype/microtype.cfg File: microtype.cfg 2018/01/14 v2.7a microtype main configuration file (RS) )) (/usr/share/texlive/texmf-dist/tex/latex/url/url.sty \Urlmuskip=\muskip10 Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. ) (/usr/share/texlive/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty Package: fancyhdr 2017/06/30 v3.9a Extensive control of page headers and footer s \f@nch@headwidth=\skip56 \f@nch@O@elh=\skip57 \f@nch@O@erh=\skip58 \f@nch@O@olh=\skip59 \f@nch@O@orh=\skip60 \f@nch@O@elf=\skip61 \f@nch@O@erf=\skip62 \f@nch@O@olf=\skip63 \f@nch@O@orf=\skip64 ) (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.sty \lst@mode=\count114 \lst@gtempboxa=\box26 \lst@token=\toks19 \lst@length=\count115 \lst@currlwidth=\dimen130 \lst@column=\count116 \lst@pos=\count117 \lst@lostspace=\dimen131 \lst@width=\dimen132 \lst@newlines=\count118 \lst@lineno=\count119 \lst@maxwidth=\dimen133 (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz) \c@lstnumber=\count120 \lst@skipnumbers=\count121 \lst@framebox=\box27 ) (/usr/share/texlive/texmf-dist/tex/latex/listings/listings.cfg File: listings.cfg 2015/06/04 1.6 listings configuration )) Package: listings 2015/06/04 1.6 (Carsten Heinz) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty File: lstlang1.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty File: lstlang2.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty File: lstlang3.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty File: lstlang1.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty File: lstlang2.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty File: lstlang3.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty File: lstlang1.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang2.sty File: lstlang2.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang3.sty File: lstlang3.sty 2015/06/04 1.6 listings language file ) (/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz) ) (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hyperref.sty Package: hyperref 2018/02/06 v6.86b Hypertext links for LaTeX (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty Package: hobsub-hyperref 2016/05/16 v1.14 Bundle oberdiek, subset hyperref (HO) (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty Package: hobsub-generic 2016/05/16 v1.14 Bundle oberdiek, subset generic (HO) Package: hobsub 2016/05/16 v1.14 Construct package bundles (HO) Package: infwarerr 2016/05/16 v1.4 Providing info/warning/error messages (HO) Package: ltxcmds 2016/05/16 v1.23 LaTeX kernel commands for general use (HO) Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO) Package ifluatex Info: LuaTeX not detected. Package: ifvtex 2016/05/16 v1.6 Detect VTeX and its facilities (HO) Package ifvtex Info: VTeX not detected. Package: intcalc 2016/05/16 v1.2 Expandable calculations with integers (HO) Package: ifpdf 2017/03/15 v3.2 Provides the ifpdf switch Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO) Package etexcmds Info: Could not find \expanded. (etexcmds) That can mean that you are not using pdfTeX 1.50 or (etexcmds) that some package has redefined \expanded. (etexcmds) In the latter case, load this package earlier. Package: kvsetkeys 2016/05/16 v1.17 Key value parser (HO) Package: kvdefinekeys 2016/05/16 v1.4 Define keys (HO) Package: pdftexcmds 2018/01/21 v0.26 Utility functions of pdfTeX for LuaTeX (HO ) Package pdftexcmds Info: LuaTeX not detected. Package pdftexcmds Info: \pdf@primitive is available. Package pdftexcmds Info: \pdf@ifprimitive is available. Package pdftexcmds Info: \pdfdraftmode found. Package: pdfescape 2016/05/16 v1.14 Implements pdfTeX's escape features (HO) Package: bigintcalc 2016/05/16 v1.4 Expandable calculations on big integers (HO ) Package: bitset 2016/05/16 v1.2 Handle bit-vector datatype (HO) Package: uniquecounter 2016/05/16 v1.3 Provide unlimited unique counter (HO) ) Package hobsub Info: Skipping package `hobsub' (already loaded). Package: letltxmacro 2016/05/16 v1.5 Let assignment for LaTeX macros (HO) Package: hopatch 2016/05/16 v1.3 Wrapper for package hooks (HO) Package: xcolor-patch 2016/05/16 xcolor patch Package: atveryend 2016/05/16 v1.9 Hooks at the very end of document (HO) Package atveryend Info: \enddocument detected (standard20110627). Package: atbegshi 2016/06/09 v1.18 At begin shipout hook (HO) Package: refcount 2016/05/16 v3.5 Data extraction from label references (HO) Package: hycolor 2016/05/16 v1.8 Color options for hyperref/bookmark (HO) ) (/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/auxhook.sty Package: auxhook 2016/05/16 v1.4 Hooks for auxiliary files (HO) ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/kvoptions.sty Package: kvoptions 2016/05/16 v3.12 Key value format for package options (HO) ) \@linkdim=\dimen134 \Hy@linkcounter=\count122 \Hy@pagecounter=\count123 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/pd1enc.def File: pd1enc.def 2018/02/06 v6.86b Hyperref: PDFDocEncoding definition (HO) Now handling font encoding PD1 ... ... no UTF-8 mapping file for font encoding PD1 ) \Hy@SavedSpaceFactor=\count124 (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/hyperref.cfg File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive ) Package hyperref Info: Hyper figures OFF on input line 4509. Package hyperref Info: Link nesting OFF on input line 4514. Package hyperref Info: Hyper index ON on input line 4517. Package hyperref Info: Plain pages OFF on input line 4524. Package hyperref Info: Backreferencing OFF on input line 4529. Package hyperref Info: Implicit mode ON; LaTeX internals redefined. Package hyperref Info: Bookmarks ON on input line 4762. \c@Hy@tempcnt=\count125 LaTeX Info: Redefining \url on input line 5115. \XeTeXLinkMargin=\dimen135 \Fld@menulength=\count126 \Field@Width=\dimen136 \Fld@charsize=\dimen137 Package hyperref Info: Hyper figures OFF on input line 6369. Package hyperref Info: Link nesting OFF on input line 6374. Package hyperref Info: Hyper index ON on input line 6377. Package hyperref Info: backreferencing OFF on input line 6384. Package hyperref Info: Link coloring OFF on input line 6389. Package hyperref Info: Link coloring with OCG OFF on input line 6394. Package hyperref Info: PDF/A mode OFF on input line 6399. LaTeX Info: Redefining \ref on input line 6439. LaTeX Info: Redefining \pageref on input line 6443. \Hy@abspage=\count127 \c@Item=\count128 \c@Hfootnote=\count129 ) Package hyperref Info: Driver (autodetected): hpdftex. (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hpdftex.def File: hpdftex.def 2018/02/06 v6.86b Hyperref driver for pdfTeX \Fld@listcount=\count130 \c@bookmark@seq@number=\count131 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty Package: rerunfilecheck 2016/05/16 v1.8 Rerun checks for auxiliary files (HO) Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2 82. ) \Hy@SectionHShift=\skip65 ) Package hyperref Info: Option `colorlinks' set `true' on input line 105. (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pdfpages.sty Package: pdfpages 2017/10/31 v0.5l Insert pages of external PDF documents (AM) (/usr/share/texlive/texmf-dist/tex/latex/base/ifthen.sty Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC) ) (/usr/share/texlive/texmf-dist/tex/latex/tools/calc.sty Package: calc 2014/10/28 v4.3 Infix arithmetic (KKT,FJ) \calc@Acount=\count132 \calc@Bcount=\count133 \calc@Adimen=\dimen138 \calc@Bdimen=\dimen139 \calc@Askip=\skip66 \calc@Bskip=\skip67 LaTeX Info: Redefining \setlength on input line 80. LaTeX Info: Redefining \addtolength on input line 81. \calc@Ccount=\count134 \calc@Cskip=\skip68 ) (/usr/share/texlive/texmf-dist/tex/latex/eso-pic/eso-pic.sty Package: eso-pic 2015/07/21 v2.0g eso-pic (RN) ) \AM@pagewidth=\dimen140 \AM@pageheight=\dimen141 (/usr/share/texlive/texmf-dist/tex/latex/pdfpages/pppdftex.def File: pppdftex.def 2017/10/31 v0.5l Pdfpages driver for pdfTeX (AM) ) \AM@pagebox=\box28 \AM@global@opts=\toks20 \AM@toc@title=\toks21 \c@AM@survey=\count135 \AM@templatesizebox=\box29 ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bookmark.sty Package: bookmark 2016/05/17 v1.26 PDF bookmarks (HO) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/bkm-pdftex.def File: bkm-pdftex.def 2016/05/17 v1.26 bookmark driver for pdfTeX (HO) \BKM@id=\count136 )) (/usr/share/texlive/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex \pgfutil@everybye=\toks22 \pgfutil@tempdima=\dimen142 \pgfutil@tempdimb=\dimen143 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common-lists.t ex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def \pgfutil@abb=\box30 (/usr/share/texlive/texmf-dist/tex/latex/ms/everyshi.sty Package: everyshi 2001/05/15 v3.00 EveryShipout Package (MS) )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex Package: pgfrcs 2015/08/07 v3.0.1a (rcs-revision 1.31) )) Package: pgf 2015/08/07 v3.0.1a (rcs-revision 1.15) (/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex Package: pgfsys 2014/07/09 v3.0.1a (rcs-revision 1.48) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex \pgfkeys@pathtoks=\toks23 \pgfkeys@temptoks=\toks24 (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeysfiltered.code.t ex \pgfkeys@tmptoks=\toks25 )) \pgf@x=\dimen144 \pgf@y=\dimen145 \pgf@xa=\dimen146 \pgf@ya=\dimen147 \pgf@xb=\dimen148 \pgf@yb=\dimen149 \pgf@xc=\dimen150 \pgf@yc=\dimen151 \w@pgf@writea=\write3 \r@pgf@reada=\read1 \c@pgf@counta=\count137 \c@pgf@countb=\count138 \c@pgf@countc=\count139 \c@pgf@countd=\count140 \t@pgf@toka=\toks26 \t@pgf@tokb=\toks27 \t@pgf@tokc=\toks28 (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg File: pgf.cfg 2008/05/14 (rcs-revision 1.7) ) Driver file for pgf: pgfsys-pdftex.def (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def File: pgfsys-pdftex.def 2014/10/11 (rcs-revision 1.35) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.de f File: pgfsys-common-pdf.def 2013/10/10 (rcs-revision 1.13) ))) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code. tex File: pgfsyssoftpath.code.tex 2013/09/09 (rcs-revision 1.9) \pgfsyssoftpath@smallbuffer@items=\count141 \pgfsyssoftpath@bigbuffer@items=\count142 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code. tex File: pgfsysprotocol.code.tex 2006/10/16 (rcs-revision 1.4) )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex Package: pgfcore 2010/04/11 v3.0.1a (rcs-revision 1.7) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex \pgfmath@dimen=\dimen152 \pgfmath@count=\count143 \pgfmath@box=\box31 \pgfmath@toks=\toks29 \pgfmath@stack@operand=\toks30 \pgfmath@stack@operation=\toks31 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code .tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonomet ric.code.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.cod e.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison .code.tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code. tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code .tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code. tex) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerari thmetics.code.tex))) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex \c@pgfmathroundto@lastzeros=\count144 )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.te x File: pgfcorepoints.code.tex 2013/10/07 (rcs-revision 1.27) \pgf@picminx=\dimen153 \pgf@picmaxx=\dimen154 \pgf@picminy=\dimen155 \pgf@picmaxy=\dimen156 \pgf@pathminx=\dimen157 \pgf@pathmaxx=\dimen158 \pgf@pathminy=\dimen159 \pgf@pathmaxy=\dimen160 \pgf@xx=\dimen161 \pgf@xy=\dimen162 \pgf@yx=\dimen163 \pgf@yy=\dimen164 \pgf@zx=\dimen165 \pgf@zy=\dimen166 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct. code.tex File: pgfcorepathconstruct.code.tex 2013/10/07 (rcs-revision 1.29) \pgf@path@lastx=\dimen167 \pgf@path@lasty=\dimen168 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code .tex File: pgfcorepathusage.code.tex 2014/11/02 (rcs-revision 1.24) \pgf@shorten@end@additional=\dimen169 \pgf@shorten@start@additional=\dimen170 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.te x File: pgfcorescopes.code.tex 2015/05/08 (rcs-revision 1.46) \pgfpic=\box32 \pgf@hbox=\box33 \pgf@layerbox@main=\box34 \pgf@picture@serial@count=\count145 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.c ode.tex File: pgfcoregraphicstate.code.tex 2014/11/02 (rcs-revision 1.12) \pgflinewidth=\dimen171 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformation s.code.tex File: pgfcoretransformations.code.tex 2015/08/07 (rcs-revision 1.20) \pgf@pt@x=\dimen172 \pgf@pt@y=\dimen173 \pgf@pt@temp=\dimen174 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex File: pgfcorequick.code.tex 2008/10/09 (rcs-revision 1.3) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.t ex File: pgfcoreobjects.code.tex 2006/10/11 (rcs-revision 1.2) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing .code.tex File: pgfcorepathprocessing.code.tex 2013/09/09 (rcs-revision 1.9) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.te x File: pgfcorearrows.code.tex 2015/05/14 (rcs-revision 1.43) \pgfarrowsep=\dimen175 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex File: pgfcoreshade.code.tex 2013/07/15 (rcs-revision 1.15) \pgf@max=\dimen176 \pgf@sys@shading@range@num=\count146 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex File: pgfcoreimage.code.tex 2013/07/15 (rcs-revision 1.18) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code. tex File: pgfcoreexternal.code.tex 2014/07/09 (rcs-revision 1.21) \pgfexternal@startupbox=\box35 )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.te x File: pgfcorelayers.code.tex 2013/07/18 (rcs-revision 1.7) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.c ode.tex File: pgfcoretransparency.code.tex 2013/09/30 (rcs-revision 1.5) ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code. tex File: pgfcorepatterns.code.tex 2013/11/07 (rcs-revision 1.5) ))) (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex File: pgfmoduleshapes.code.tex 2014/03/21 (rcs-revision 1.35) \pgfnodeparttextbox=\box36 ) (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex File: pgfmoduleplot.code.tex 2015/08/03 (rcs-revision 1.13) ) (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65 .sty Package: pgfcomp-version-0-65 2007/07/03 v3.0.1a (rcs-revision 1.7) \pgf@nodesepstart=\dimen177 \pgf@nodesepend=\dimen178 ) (/usr/share/texlive/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18 .sty Package: pgfcomp-version-1-18 2007/07/23 v3.0.1a (rcs-revision 1.1) )) (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (/usr/share/texlive/texmf-dist/tex/latex/pgf/math/pgfmath.sty (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex Package: pgffor 2013/12/13 v3.0.1a (rcs-revision 1.25) (/usr/share/texlive/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex) \pgffor@iter=\dimen179 \pgffor@skip=\dimen180 \pgffor@stack=\toks32 \pgffor@toks=\toks33 )) (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex Package: tikz 2015/08/07 v3.0.1a (rcs-revision 1.151) (/usr/share/texlive/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers .code.tex File: pgflibraryplothandlers.code.tex 2013/08/31 v3.0.1a (rcs-revision 1.20) \pgf@plot@mark@count=\count147 \pgfplotmarksize=\dimen181 ) \tikz@lastx=\dimen182 \tikz@lasty=\dimen183 \tikz@lastxsaved=\dimen184 \tikz@lastysaved=\dimen185 \tikzleveldistance=\dimen186 \tikzsiblingdistance=\dimen187 \tikz@figbox=\box37 \tikz@figbox@bg=\box38 \tikz@tempbox=\box39 \tikz@tempbox@bg=\box40 \tikztreelevel=\count148 \tikznumberofchildren=\count149 \tikznumberofcurrentchild=\count150 \tikz@fig@count=\count151 (/usr/share/texlive/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex File: pgfmodulematrix.code.tex 2013/09/17 (rcs-revision 1.8) \pgfmatrixcurrentrow=\count152 \pgfmatrixcurrentcolumn=\count153 \pgf@matrix@numberofcolumns=\count154 ) \tikz@expandcount=\count155 (/usr/share/texlive/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tik zlibrarytopaths.code.tex File: tikzlibrarytopaths.code.tex 2008/06/17 v3.0.1a (rcs-revision 1.2) ))) (/usr/share/texlive/texmf-dist/tex/latex/titlesec/titlesec.sty Package: titlesec 2016/03/21 v2.10.2 Sectioning titles \ttl@box=\box41 \beforetitleunit=\skip69 \aftertitleunit=\skip70 \ttl@plus=\dimen188 \ttl@minus=\dimen189 \ttl@toksa=\toks34 \titlewidth=\dimen190 \titlewidthlast=\dimen191 \titlewidthfirst=\dimen192 ) (/usr/share/texlive/texmf-dist/tex/latex/titlesec/ttlkeys.def File: ttlkeys.def 2016/03/15 \c@ttlp@side=\count156 \ttlp@side=\count157 ) \c@myparts=\count158 (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty Package: amsmath 2017/09/02 v2.17a AMS math features \@mathmargin=\skip71 For additional information on amsmath, use the `?' option. (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty Package: amstext 2000/06/29 v2.01 AMS text (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty File: amsgen.sty 1999/11/30 v2.0 generic functions \@emptytoks=\toks35 \ex@=\dimen193 )) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty Package: amsbsy 1999/11/29 v1.2d Bold Symbols \pmbraise@=\dimen194 ) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty Package: amsopn 2016/03/08 v2.02 operator names ) \inf@bad=\count159 LaTeX Info: Redefining \frac on input line 213. \uproot@=\count160 \leftroot@=\count161 LaTeX Info: Redefining \overline on input line 375. \classnum@=\count162 \DOTSCASE@=\count163 LaTeX Info: Redefining \ldots on input line 472. LaTeX Info: Redefining \dots on input line 475. LaTeX Info: Redefining \cdots on input line 596. \Mathstrutbox@=\box42 \strutbox@=\box43 \big@size=\dimen195 LaTeX Font Info: Redeclaring font encoding OML on input line 712. LaTeX Font Info: Redeclaring font encoding OMS on input line 713. \macc@depth=\count164 \c@MaxMatrixCols=\count165 \dotsspace@=\muskip11 \c@parentequation=\count166 \dspbrk@lvl=\count167 \tag@help=\toks36 \row@=\count168 \column@=\count169 \maxfields@=\count170 \andhelp@=\toks37 \eqnshift@=\dimen196 \alignsep@=\dimen197 \tagshift@=\dimen198 \tagwidth@=\dimen199 \totwidth@=\dimen256 \lineht@=\dimen257 \@envbody=\toks38 \multlinegap=\skip72 \multlinetaggap=\skip73 \mathdisplay@stack=\toks39 LaTeX Info: Redefining \[ on input line 2817. LaTeX Info: Redefining \] on input line 2818. ) (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amsfonts.sty Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support \symAMSa=\mathgroup6 \symAMSb=\mathgroup7 LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' (Font) U/euf/m/n --> U/euf/b/n on input line 106. LaTeX Font Info: Redeclaring math symbol \square on input line 141. ) (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/amssymb.sty Package: amssymb 2013/01/14 v3.01 AMS font symbols LaTeX Font Info: Redeclaring math symbol \blacksquare on input line 48. LaTeX Font Info: Redeclaring math symbol \vDash on input line 60. LaTeX Font Info: Redeclaring math symbol \leftleftarrows on input line 63. LaTeX Font Info: Redeclaring math symbol \rightrightarrows on input line 64. LaTeX Font Info: Redeclaring math symbol \leqslant on input line 101. LaTeX Font Info: Redeclaring math symbol \geqslant on input line 108. LaTeX Font Info: Redeclaring math symbol \blacktriangleright on input line 1 20. LaTeX Font Info: Redeclaring math symbol \blacktriangleleft on input line 12 1. LaTeX Font Info: Redeclaring math symbol \complement on input line 165. LaTeX Font Info: Redeclaring math symbol \intercal on input line 166. LaTeX Font Info: Redeclaring math symbol \nleqslant on input line 181. LaTeX Font Info: Redeclaring math symbol \ngeqslant on input line 182. LaTeX Font Info: Redeclaring math symbol \varsubsetneq on input line 203. LaTeX Font Info: Redeclaring math symbol \subsetneqq on input line 207. LaTeX Font Info: Redeclaring math symbol \nparallel on input line 215. LaTeX Font Info: Redeclaring math symbol \nvDash on input line 221. LaTeX Font Info: Redeclaring math symbol \nexists on input line 235. LaTeX Font Info: Redeclaring math symbol \smallsetminus on input line 251. LaTeX Font Info: Redeclaring math symbol \curvearrowleft on input line 257. LaTeX Font Info: Redeclaring math symbol \curvearrowright on input line 258. LaTeX Font Info: Redeclaring math symbol \varkappa on input line 260. LaTeX Font Info: Redeclaring math symbol \hslash on input line 262. ) (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mathtools.sty Package: mathtools 2018/01/08 v1.21 mathematical typesetting tools (/usr/share/texlive/texmf-dist/tex/latex/mathtools/mhsetup.sty Package: mhsetup 2017/03/31 v1.3 programming setup (MH) ) LaTeX Info: Thecontrolsequence`\('isalreadyrobust on input line 129. LaTeX Info: Thecontrolsequence`\)'isalreadyrobust on input line 129. LaTeX Info: Thecontrolsequence`\['isalreadyrobust on input line 129. LaTeX Info: Thecontrolsequence`\]'isalreadyrobust on input line 129. \g_MT_multlinerow_int=\count171 \l_MT_multwidth_dim=\dimen258 \origjot=\skip74 \l_MT_shortvdotswithinadjustabove_dim=\dimen259 \l_MT_shortvdotswithinadjustbelow_dim=\dimen260 \l_MT_above_intertext_sep=\dimen261 \l_MT_below_intertext_sep=\dimen262 \l_MT_above_shortintertext_sep=\dimen263 \l_MT_below_shortintertext_sep=\dimen264 )) (./head/settings_custom.tex (/usr/share/texlive/texmf-dist/tex/latex/algorithm2e/algorithm2e.sty Package: algorithm2e 2017/07/18 v5.2 algorithms environments \c@AlgoLine=\count172 \algocf@hangindent=\skip75 (/usr/share/texlive/texmf-dist/tex/latex/ifoddpage/ifoddpage.sty Package: ifoddpage 2016/04/23 v1.1 Conditionals for odd/even page detection \c@checkoddpage=\count173 ) (/usr/share/texlive/texmf-dist/tex/latex/tools/xspace.sty Package: xspace 2014/10/28 v1.13 Space after command names (DPC,MH) ) (/usr/share/texlive/texmf-dist/tex/latex/relsize/relsize.sty Package: relsize 2013/03/29 ver 4.1 ) \skiptotal=\skip76 \skiplinenumber=\skip77 \skiprule=\skip78 \skiphlne=\skip79 \skiptext=\skip80 \skiplength=\skip81 \algomargin=\skip82 \skipalgocfslide=\skip83 \algowidth=\dimen265 \inoutsize=\dimen266 \inoutindent=\dimen267 \interspacetitleruled=\dimen268 \interspacealgoruled=\dimen269 \interspacetitleboxruled=\dimen270 \algocf@ruledwidth=\skip84 \algocf@inoutbox=\box44 \algocf@inputbox=\box45 \AlCapSkip=\skip85 \AlCapHSkip=\skip86 \algoskipindent=\skip87 \algocf@nlbox=\box46 \algocf@hangingbox=\box47 \algocf@untilbox=\box48 \algocf@skipuntil=\skip88 \algocf@capbox=\box49 \algocf@lcaptionbox=\skip89 \algoheightruledefault=\skip90 \algoheightrule=\skip91 \algotitleheightruledefault=\skip92 \algotitleheightrule=\skip93 \c@algocfline=\count174 \c@algocfproc=\count175 \c@algocf=\count176 \algocf@algoframe=\box50 \algocf@algobox=\box51 ) (/usr/share/texlive/texmf-dist/tex/latex/float/float.sty Package: float 2001/11/08 v1.3d Float enhancements (AL) \c@float@type=\count177 \float@exts=\toks40 \float@box=\box52 \@float@everytoks=\toks41 \@floatcapt=\box53 ) (/usr/share/texlive/texmf-dist/tex/latex/hyperref/xr-hyper.sty Package: xr-hyper 2000/03/22 v6.00beta4 eXternal References (DPC) Package xr-hyper Warning: Load package `hyperref' after `xr-hyper'. ) (/usr/share/texlive/texmf-dist/tex/latex/makecell/makecell.sty Package: makecell 2009/08/03 V0.1e Managing of Tab Column Heads and Cells (/usr/share/texlive/texmf-dist/tex/latex/tools/array.sty Package: array 2016/10/06 v2.4d Tabular extension package (FMi) \col@sep=\dimen271 \extrarowheight=\dimen272 \NC@list=\toks42 \extratabsurround=\skip94 \backup@length=\skip95 ) \rotheadsize=\dimen273 \c@nlinenum=\count178 \TeXr@lab=\toks43 )) (./my_thesis.aux (./head/dedication.aux) (./head/acknowledgements.aux) (./head/preface.aux) (./head/abstracts.aux) (./main/ch_introduction.aux) (./main/ch_group_projects.aux) (./main/ch_encode_peaks.aux) (./main/ch_smile-seq.aux) (./main/ch_atac-seq.aux LaTeX Warning: Label `encode_peaks_algo_ndr_extend' multiply defined. ) (./tail/appendix.aux LaTeX Warning: Label `suppl_emseq_sp1_10class' multiply defined. ) (./tail/biblio.aux) (./tail/cv.aux)) \openout1 = `my_thesis.aux'. LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for FML/futm/m/it on input line 18. LaTeX Font Info: Try loading font information for FML+futm on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutm.fd File: fmlfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futm. ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for FMS/futm/m/n on input line 18. LaTeX Font Info: Try loading font information for FMS+futm on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmsfutm.fd File: fmsfutm.fd 2004/10/30 Fontinst v1.926 font definitions for FMS/futm. ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for FMX/futm/m/n on input line 18. LaTeX Font Info: Try loading font information for FMX+futm on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmxfutm.fd File: fmxfutm.fd futm-extension ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 18. LaTeX Font Info: Try loading font information for TS1+cmr on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd File: ts1cmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions ) LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 18. LaTeX Font Info: ... okay on input line 18. LaTeX Font Info: Try loading font information for T1+futs on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/fourier/t1futs.fd File: t1futs.fd 2004/03/02 Fontinst v1.926 font definitions for T1/futs. ) LaTeX Info: Redefining \degres on input line 18. LaTeX Info: Redefining \dots on input line 18. LaTeX Info: Redefining \up on input line 18. (/usr/share/texlive/texmf-dist/tex/context/base/mkii/supp-pdf.mkii [Loading MPS to PDF converter (version 2006.09.02).] \scratchcounter=\count179 \scratchdimen=\dimen274 \scratchbox=\box54 \nofMPsegments=\count180 \nofMParguments=\count181 \everyMPshowfont=\toks44 \MPscratchCnt=\count182 \MPscratchDim=\dimen275 \MPnumerator=\count183 \makeMPintoPDFobject=\count184 \everyMPtoPDFconversion=\toks45 ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty Package: epstopdf-base 2016/05/15 v2.6 Base part for package epstopdf (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/grfext.sty Package: grfext 2016/05/16 v1.2 Manage graphics extensions (HO) ) Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4 38. Package grfext Info: Graphics extension search list: (grfext) [.pdf,.png,.jpg,.mps,.jpeg,.jbig2,.jb2,.PDF,.PNG,.JPG,.JPE G,.JBIG2,.JB2,.eps] (grfext) \AppendGraphicsExtensions on input line 456. (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv e )) Package caption Info: Begin \AtBeginDocument code. Package caption Info: subfig package v1.3 is loaded. Package caption Info: float package is loaded. Package caption Info: hyperref package is loaded. Package caption Info: listings package is loaded. Package caption Info: End \AtBeginDocument code. LaTeX Info: Redefining \microtypecontext on input line 18. Package microtype Info: Generating PDF output. Package microtype Info: Character protrusion enabled (level 2). Package microtype Info: Using default protrusion set `alltext'. Package microtype Info: Automatic font expansion enabled (level 2), (microtype) stretch: 20, shrink: 20, step: 1, non-selected. Package microtype Info: Using default expansion set `basictext'. Package microtype Info: No adjustment of tracking. Package microtype Info: No adjustment of interword spacing. Package microtype Info: No adjustment of character kerning. Package microtype Info: Loading generic protrusion settings for font family (microtype) `futs' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. \c@lstlisting=\count185 \AtBeginShipoutBox=\box55 Package hyperref Info: Link coloring ON on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/hyperref/nameref.sty Package: nameref 2016/05/21 v2.44 Cross-referencing by name of section (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/gettitlestring.sty Package: gettitlestring 2016/05/16 v1.5 Cleanup title references (HO) ) \c@section@level=\count186 ) LaTeX Info: Redefining \ref on input line 18. LaTeX Info: Redefining \pageref on input line 18. LaTeX Info: Redefining \nameref on input line 18. (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/pdflscape.sty Package: pdflscape 2016/05/14 v0.11 Display of landscape pages in PDF (HO) (/usr/share/texlive/texmf-dist/tex/latex/graphics/lscape.sty Package: lscape 2000/10/22 v3.01 Landscape Pages (DPC) ) Package pdflscape Info: Auto-detected driver: pdftex on input line 81. ) ABD: EveryShipout initializing macros (./head/titlepage.tex LaTeX Font Info: Try loading font information for T1+lmss on input line 5. (/usr/share/texmf/tex/latex/lm/t1lmss.fd File: t1lmss.fd 2009/10/30 v1.6 Font defs for Latin Modern ) Package microtype Info: Loading generic protrusion settings for font family (microtype) `lmss' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. LaTeX Font Info: Try loading font information for FML+futmi on input line 14 . (/usr/share/texlive/texmf-dist/tex/latex/fourier/fmlfutmi.fd File: fmlfutmi.fd 2004/10/30 Fontinst v1.926 font definitions for FML/futmi. ) LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 10.07397pt on input line 14. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 7.63599pt on input line 14. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 5.51999pt on input line 14. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 10.07397pt on input line 14. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 7.63599pt on input line 14. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 5.51999pt on input line 14. LaTeX Font Info: Try loading font information for U+msa on input line 14. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsa.fd File: umsa.fd 2013/01/14 v3.01 AMS symbols A ) (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msa.cfg File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS) ) LaTeX Font Info: Try loading font information for U+msb on input line 14. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsb.fd File: umsb.fd 2013/01/14 v3.01 AMS symbols B ) (/usr/share/texlive/texmf-dist/tex/latex/microtype/mt-msb.cfg File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS) ) File: images/epfl.pdf Graphic file (type pdf) Package pdftex.def Info: images/epfl.pdf used on input line 15. (pdftex.def) Requested size: 113.81102pt x 49.4394pt. Overfull \hbox (23.99998pt too wide) in paragraph at lines 14--41 [][] [] [1 {/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} <./images/epfl.pdf>]) \openout2 = `head/dedication.aux'. (./head/dedication.tex [2 ]) [3] \openout2 = `head/acknowledgements.aux'. (./head/acknowledgements.tex [0 ] LaTeX Font Info: Font shape `T1/futs/bx/n' in size <10.95> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 1. LaTeX Font Info: Font shape `T1/futs/bx/n' in size <24.88> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 1. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 22.88956pt on input line 1. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 15.89755pt on input line 1. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 11.03998pt on input line 1. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 22.88956pt on input line 1. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 15.89755pt on input line 1. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 11.03998pt on input line 1. )pdfTeX warning (ext4): destination with the same identifier (name{page.i}) has been already used, duplicate ignored \relax l.25 \include{head/acknowledgements} [1] \openout2 = `head/preface.aux'. (./head/preface.texpdfTeX warning (ext4): destination with the same identifier (name{page.ii}) has been already used, duplicate ignored \relax l.1 \cleardoublepage [2 ])pdfTeX warning (ext4): destination with the same identifier (name{page.iii}) has been already used, duplicate ignored \relax l.26 \include{head/preface} [3 ] \openout2 = `head/abstracts.aux'. (./head/abstracts.tex [4 ] Package babel Info: Redefining german shorthand "f (babel) in language on input line 18. Package babel Info: Redefining german shorthand "| (babel) in language on input line 18. Package babel Info: Redefining german shorthand "~ (babel) in language on input line 18. Package babel Info: Redefining german shorthand "f (babel) in language on input line 18. Package babel Info: Redefining german shorthand "| (babel) in language on input line 18. Package babel Info: Redefining german shorthand "~ (babel) in language on input line 18. [5 ] Package babel Info: Redefining german shorthand "f (babel) in language on input line 18. Package babel Info: Redefining german shorthand "| (babel) in language on input line 18. Package babel Info: Redefining german shorthand "~ (babel) in language on input line 18. Package babel Info: Redefining german shorthand "f (babel) in language on input line 18. Package babel Info: Redefining german shorthand "| (babel) in language on input line 18. Package babel Info: Redefining german shorthand "~ (babel) in language on input line 18. [6 ] [7 ] [8 ]) [9 ] [10 ] (./my_thesis.toc [11 ] [12]) \tf@toc=\write4 \openout4 = `my_thesis.toc'. [13] [14 ] \openout2 = `main/ch_introduction.aux'. (./main/ch_introduction.tex Chapter 1. LaTeX Font Info: Font shape `T1/futs/bx/n' in size <14.4> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 14. LaTeX Font Info: Font shape `T1/futs/bx/n' in size <12> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 26. File: images/ch_introduction/chromatin.png Graphic file (type png) Package pdftex.def Info: images/ch_introduction/chromatin.png used on input li ne 31. (pdftex.def) Requested size: 209.41869pt x 285.22055pt. Underfull \vbox (badness 10000) has occurred while \output is active [] [1 ] [2 <./images/ch_introduction/chromatin.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] [3] + +File: images/ch_introduction/nucleosome_positioning.png Graphic file (type png) + + +Package pdftex.def Info: images/ch_introduction/nucleosome_positioning.png use +d on input line 69. +(pdftex.def) Requested size: 290.89583pt x 230.46324pt. + Underfull \vbox (badness 10000) has occurred while \output is active [] [4] +[5 <./images/ch_introduction/nucleosome_positioning.png>] +Underfull \vbox (badness 10000) has occurred while \output is active [] + + [6] Underfull \vbox (badness 10000) has occurred while \output is active [] - [5]) -[6] + [7]) +[8] \openout2 = `main/ch_group_projects.aux'. (./main/ch_group_projects.tex Chapter 2. - + File: images/ch_group_projects/mga_figure1.jpeg Graphic file (type jpg) Package pdftex.def Info: images/ch_group_projects/mga_figure1.jpeg used on inp ut line 27. (pdftex.def) Requested size: 400.23181pt x 134.13329pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [7 + [9 ] -[8 <./images/ch_group_projects/mga_figure1.jpeg>] [9] - +[10 <./images/ch_group_projects/mga_figure1.jpeg>] [11] + File: images/ch_group_projects/epd_figure1.jpeg Graphic file (type jpg) Package pdftex.def Info: images/ch_group_projects/epd_figure1.jpeg used on inp ut line 72. (pdftex.def) Requested size: 215.12772pt x 174.80144pt. -LaTeX Warning: Reference `L' on page 10 undefined on input line 73. +LaTeX Warning: Reference `L' on page 12 undefined on input line 73. -LaTeX Warning: Reference `L' on page 10 undefined on input line 73. +LaTeX Warning: Reference `L' on page 12 undefined on input line 73. Underfull \vbox (badness 10000) has occurred while \output is active [] - [10 + [12 <./images/ch_group_projects/epd_figure1.jpeg>] - + File: images/ch_group_projects/epd_motifs.png Graphic file (type png) Package pdftex.def Info: images/ch_group_projects/epd_motifs.png used on input line 124. (pdftex.def) Requested size: 346.89868pt x 173.44933pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [11] -[12] + [13] +[14] Underfull \vbox (badness 10000) has occurred while \output is active [] - [13 <./images/ch_group_projects/epd_motifs.png (PNG copy)>] [14] -] [16] + File: images/ch_group_projects/pwmscan_flowchart.png Graphic file (type png) Package pdftex.def Info: images/ch_group_projects/pwmscan_flowchart.png used o n input line 178. (pdftex.def) Requested size: 279.21945pt x 370.52591pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [15 + [17 ] -[16 <./images/ch_group_projects/pwmscan_flowchart.png>] +[18 <./images/ch_group_projects/pwmscan_flowchart.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [17] - File: images/ch_group_projects/pwmscan_figure_s1.png Graphic file (type png) Package pdftex.def Info: images/ch_group_projects/pwmscan_figure_s1.png used o n input line 222. (pdftex.def) Requested size: 269.60248pt x 153.77177pt. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 8.27998pt on input line 235. LaTeX Font Info: Font shape `FMX/futm/m/n' will be (Font) scaled to size 6.99199pt on input line 235. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 8.27998pt on input line 235. LaTeX Font Info: Font shape `U/futm/m/n' will be (Font) scaled to size 6.99199pt on input line 235. Underfull \vbox (badness 10000) has occurred while \output is active [] - [18 <./images/ch_group_projects/pwmscan_figure_s1.png>] + [20 <./images/ch_group_projects/pwmscan_figure_s1.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [19] -[20] - + [21] +[22] + File: images/ch_group_projects/spark_figure1.pdf Graphic file (type pdf) Package pdftex.def Info: images/ch_group_projects/spark_figure1.pdf used on in put line 311. (pdftex.def) Requested size: 404.70483pt x 231.2599pt. -LaTeX Warning: Reference `fig_s07' on page 21 undefined on input line 313. +LaTeX Warning: Reference `fig_s07' on page 23 undefined on input line 313. -LaTeX Warning: Reference `fig_s07' on page 21 undefined on input line 313. +LaTeX Warning: Reference `fig_s07' on page 23 undefined on input line 313. Underfull \vbox (badness 10000) has occurred while \output is active [] - [21 + [23 ] -[22 <./images/ch_group_projects/spark_figure1.pdf>] -] + File: images/ch_group_projects/spark_supplemental_figure2.pdf Graphic file (typ e pdf) Package pdftex.def Info: images/ch_group_projects/spark_supplemental_figure2.pd f used on input line 326. (pdftex.def) Requested size: 462.5198pt x 202.3524pt. - File: images/ch_group_projects/spark_supplemental_figure4.pdf Graphic file (typ e pdf) Package pdftex.def Info: images/ch_group_projects/spark_supplemental_figure4.pd f used on input line 334. (pdftex.def) Requested size: 289.07487pt x 173.44492pt. - File: images/ch_group_projects/spark_supplemental_figure5.pdf Graphic file (typ e pdf) Package pdftex.def Info: images/ch_group_projects/spark_supplemental_figure5.pd f used on input line 342. (pdftex.def) Requested size: 289.07487pt x 173.44492pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [23 <./images/ch_group_projects/spark_supplemental_figure2.pdf>] [24 <./images + [25 <./images/ch_group_projects/spark_supplemental_figure2.pdf>] [26 <./images /ch_group_projects/spark_supplemental_figure4.pdf> <./images/ch_group_projects/ -spark_supplemental_figure5.pdf>]) [25] +spark_supplemental_figure5.pdf>]) [27] \openout2 = `main/ch_encode_peaks.aux'. - (./main/ch_encode_peaks.tex [26 + (./main/ch_encode_peaks.tex [28 ] Chapter 3. - File: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/peaklist_peaknumber_GM12878.png used on input line 26. (pdftex.def) Requested size: 520.34802pt x 173.44933pt. Overfull \hbox (102.66156pt too wide) in paragraph at lines 26--27 [] [] - File: images/ch_encode_peaks/peaklist_proportions_GM12878.png Graphic file (typ e png) Package pdftex.def Info: images/ch_encode_peaks/peaklist_proportions_GM12878.pn g used on input line 35. (pdftex.def) Requested size: 520.34802pt x 173.44933pt. Overfull \hbox (102.66156pt too wide) in paragraph at lines 35--36 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [27 + [29 ] -[28 <./images/ch_encode_peaks/peaklist_peaknumber_GM12878.png (PNG copy)> <./im +[30 <./images/ch_encode_peaks/peaklist_peaknumber_GM12878.png (PNG copy)> <./im ages/ch_encode_peaks/peaklist_proportions_GM12878.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [29] + [31] Underfull \vbox (badness 10000) has occurred while \output is active [] - [30] - + [32] + File: images/ch_encode_peaks/MNase_profiles.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/MNase_profiles.png used on inp ut line 81. (pdftex.def) Requested size: 377.15814pt x 259.5691pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [31] -[32 <./images/ch_encode_peaks/MNase_profiles.png>] -] + File: images/ch_encode_peaks/colocalization_ctcf.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/colocalization_ctcf.png used o n input line 108. (pdftex.def) Requested size: 403.20538pt x 320.54678pt. - File: images/ch_encode_peaks/CTCF_ndr_length_rad212.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/CTCF_ndr_length_rad212.png use d on input line 116. (pdftex.def) Requested size: 404.70483pt x 231.2599pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [33] -[34 <./images/ch_encode_peaks/colocalization_ctcf.png>] [35 <./images/ch_encode + [35] +[36 <./images/ch_encode_peaks/colocalization_ctcf.png>] [37 <./images/ch_encode _peaks/CTCF_ndr_length_rad212.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [36] - + [38] + File: images/ch_encode_peaks/TF_associations.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/TF_associations.png used on in put line 145. (pdftex.def) Requested size: 240.13863pt x 152.59023pt. - File: images/ch_encode_peaks/ctcf_motif_association.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ctcf_motif_association.png use d on input line 153. (pdftex.def) Requested size: 433.61232pt x 339.18118pt. Overfull \hbox (15.92586pt too wide) in paragraph at lines 153--154 [] [] LaTeX Warning: Float too large for page by 31.83305pt on input line 209. -[37 <./images/ch_encode_peaks/TF_associations.png>] [38 <./images/ch_encode_pea -ks/ctcf_motif_association.png>] [39] +[39 <./images/ch_encode_peaks/TF_associations.png>] [40 <./images/ch_encode_pea +ks/ctcf_motif_association.png>] [41] Underfull \vbox (badness 10000) has occurred while \output is active [] - [40] + [42] Underfull \vbox (badness 10000) has occurred while \output is active [] - [41] - + [43] + File: images/ch_encode_peaks/ebf1_haib_1.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_1.png used on input line 234. (pdftex.def) Requested size: 260.16739pt x 346.88986pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [42] -[43 <./images/ch_encode_peaks/ebf1_haib_1.png (PNG copy)>] + [44] +[45 <./images/ch_encode_peaks/ebf1_haib_1.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [44] + [46] LaTeX Warning: Reference `https://ccg.epfl.ch/mga/hg19/phastcons/phastcons.html -' on page 45 undefined on input line 280. +' on page 47 undefined on input line 280. Underfull \vbox (badness 10000) has occurred while \output is active [] - [45] + [47] Underfull \vbox (badness 10000) has occurred while \output is active [] - [46] + [48] Underfull \vbox (badness 10000) has occurred while \output is active [] - [47] + [49] LaTeX Font Info: Font shape `T1/futs/bx/n' in size <8> not available (Font) Font shape `T1/futs/b/n' tried instead on input line 411. LaTeX Font Info: Try loading font information for T1+lmtt on input line 411. (/usr/share/texmf/tex/latex/lm/t1lmtt.fd File: t1lmtt.fd 2009/10/30 v1.6 Font defs for Latin Modern ) Package microtype Info: Loading generic protrusion settings for font family (microtype) `lmtt' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. Underfull \vbox (badness 10000) has occurred while \output is active [] - [48] + [50] Underfull \vbox (badness 10000) has occurred while \output is active [] - [49] + [51] Overfull \hbox (9.9085pt too wide) in paragraph at lines 434--435 \T1/futs/m/n/10.95 (-20) ences were the cor-rected EBF1 peaks (wgEn-codeAwgTf-b -sHaibGm12878Ebf1sc137065Pcr1xUniPk [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [50]) + [52]) Underfull \vbox (badness 10000) has occurred while \output is active [] - [51] -[52] + [53] +[54] \openout2 = `main/ch_smile-seq.aux'. (./main/ch_smile-seq.tex Chapter 4. - + File: images/ch_smile-seq/figure1.jpg Graphic file (type jpg) Package pdftex.def Info: images/ch_smile-seq/figure1.jpg used on input line 23 . (pdftex.def) Requested size: 232.36755pt x 301.62613pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [53 + [55 ] -[54 <./images/ch_smile-seq/figure1.jpg>] - +[56 <./images/ch_smile-seq/figure1.jpg>] + File: images/ch_smile-seq/figure_hmm.png Graphic file (type png) Package pdftex.def Info: images/ch_smile-seq/figure_hmm.png used on input line 41. (pdftex.def) Requested size: 416.22516pt x 215.09944pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [55 <./images/ch_smile-seq/figure_hmm.png>] + [57 <./images/ch_smile-seq/figure_hmm.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [56] + [58] Underfull \vbox (badness 10000) has occurred while \output is active [] - [57] - + [59] + File: images/ch_smile-seq/figure2b_3a.png Graphic file (type png) Package pdftex.def Info: images/ch_smile-seq/figure2b_3a.png used on input lin e 119. (pdftex.def) Requested size: 398.92334pt x 166.8203pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [58] -[59 <./images/ch_smile-seq/figure2b_3a.png>]) [60] + [60] +[61 <./images/ch_smile-seq/figure2b_3a.png>]) [62] \openout2 = `main/ch_atac-seq.aux'. (./main/ch_atac-seq.tex Chapter 5. Package hyperref Info: bookmark level for unknown toc defaults to 0 on input li ne 5. - + File: images/ch_atac-seq/ATAC-seq2.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/ATAC-seq2.png used on input line 2 0. (pdftex.def) Requested size: 234.49948pt x 321.44873pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [61 + [63 ] -[62 <./images/ch_atac-seq/ATAC-seq2.png>] +[64 <./images/ch_atac-seq/ATAC-seq2.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [63] + [65] Underfull \vbox (badness 10000) has occurred while \output is active [] - [64] + [66] Underfull \hbox (badness 10000) in paragraph at lines 81--82 \T1/futs/m/n/10.95 (-12) com / 10x . ^^\les / samples / cell-[]atac / 1 . 1 . 0 / atac _ v1 _ pbmc _ 5k / atac _ v1 _ pbmc _ 5k _ possorted _ bam . [] - + File: images/ch_atac-seq/em.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/em.png used on input line 105. (pdftex.def) Requested size: 295.41382pt x 193.37625pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [65] -[66 <./images/ch_atac-seq/em.png>] + [67] +[68 <./images/ch_atac-seq/em.png>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [67] + [69] Underfull \vbox (badness 10000) has occurred while \output is active [] - [68] + [70] Underfull \vbox (badness 10000) has occurred while \output is active [] - [69] - + [71] + File: images/ch_atac-seq/fragment_lengths.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/fragment_lengths.png used on input line 257. (pdftex.def) Requested size: 433.62335pt x 130.087pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 257--258 [] [] - File: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_10e-6_aggregations.png used on input line 266. (pdftex.def) Requested size: 346.88986pt x 260.16739pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [70] -[71 <./images/ch_atac-seq/fragment_lengths.png (PNG copy)>] [72 <./images/ch_at + [72] +[73 <./images/ch_atac-seq/fragment_lengths.png (PNG copy)>] [74 <./images/ch_at ac-seq/ctcf_motifs_10e-6_aggregations.png (PNG copy)>] - File: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png Graphic file (type png ) Package pdftex.def Info: images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png us ed on input line 286. (pdftex.def) Requested size: 390.26102pt x 195.1305pt. - [73 <./images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png (PNG copy)>] + [75 <./images/ch_atac-seq/ctcf_sp1_myc_ebf1_footprint.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [74] - File: images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png Graphic fi le (type png) Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_auc _roc.png used on input line 325. (pdftex.def) Requested size: 346.88986pt x 173.44492pt. - + File: images/ch_atac-seq/sp1_motifs_7class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_7class.png used on inpu t line 333. (pdftex.def) Requested size: 455.30783pt x 303.53854pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 333--334 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [75] -[76 <./images/ch_atac-seq/simulated_sequences_2class_flip_auc_roc.png (PNG copy -)>] [77 <./images/ch_atac-seq/sp1_motifs_7class.png (PNG copy)>] -] [79 <./images/ch_atac-seq/sp1_motifs_7class.png (PNG copy)>] + File: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png used on input line 356. (pdftex.def) Requested size: 390.26102pt x 173.44933pt. - File: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png Graphic file (type p ng) Package pdftex.def Info: images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png used on input line 364. (pdftex.def) Requested size: 455.30783pt x 202.35902pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 364--365 [] [] -[78 <./images/ch_atac-seq/ctcf_motifs_6class_noshift_flip.png (PNG copy)>] -[79 <./images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png (PNG copy)>] -] +[81 <./images/ch_atac-seq/ctcf_motifs_6class_shift_flip.png (PNG copy)>] + File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png Graphic fil e (type png) Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas s_2.png used on input line 386. (pdftex.def) Requested size: 390.26102pt x 260.17401pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [80] -[81 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png (PNG copy) + [82] +[83 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class_2.png (PNG copy) >] -LaTeX Warning: Reference `berest_quantification_2018' on page 82 undefined on i +LaTeX Warning: Reference `berest_quantification_2018' on page 84 undefined on i nput line 402. - + File: images/ch_atac-seq/data_classCTCF_8class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/data_classCTCF_8class.png used on input line 411. (pdftex.def) Requested size: 433.62335pt x 216.81166pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 411--412 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [82] + [84] Underfull \vbox (badness 10000) has occurred while \output is active [] - [83 <./images/ch_atac-seq/data_classCTCF_8class.png (PNG copy)>] + [85 <./images/ch_atac-seq/data_classCTCF_8class.png (PNG copy)>] Underfull \vbox (badness 10000) has occurred while \output is active [] - [84] + [86] LaTeX Font Info: Try loading font information for TS1+futs on input line 450 . (/usr/share/texlive/texmf-dist/tex/latex/fourier/ts1futs.fd File: ts1futs.fd 2004/03/26 Fontinst v1.926 font definitions for TS1/futs. ) Package microtype Info: Loading generic protrusion settings for font family (microtype) `futs' (encoding: TS1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. Underfull \vbox (badness 6592) has occurred while \output is active [] - [85] + [87] Underfull \hbox (badness 3343) in paragraph at lines 496--496 \T1/futs/m/n/10.95 (+20) FOSL2, JUNB, JUN::JUNB, FOSL1::JUND, FOS::JUN, [] Overfull \hbox (5.93637pt too wide) in paragraph at lines 488--519 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [86] + [88] Underfull \vbox (badness 10000) has occurred while \output is active [] - [87] + [89] Underfull \vbox (badness 10000) has occurred while \output is active [] - [88] + [90] Underfull \vbox (badness 10000) has occurred while \output is active [] - [89] + [91] Overfull \vbox (42.78156pt too high) has occurred while \output is active [] -[90]) +[92]) Underfull \vbox (badness 10000) has occurred while \output is active [] - [91] -[92] + [93] +[94] \openout2 = `tail/appendix.aux'. (./tail/appendix.tex Appendix A. - File: images/ch_smile-seq/figure_s4_reproduced.png Graphic file (type png) Package pdftex.def Info: images/ch_smile-seq/figure_s4_reproduced.png used on input line 13. (pdftex.def) Requested size: 424.06316pt x 235.07848pt. Overfull \hbox (6.3767pt too wide) in paragraph at lines 13--14 [] [] +s_EM_4class_15shift_flip.png, id=1759, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_al lpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUni Pk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input line 31. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 5652) has occurred while \output is active [] - [93 + [95 <./images/ch_smile-seq/figure_s4_reproduced.png>] +_allpeaks_EM_4class_15shift_flip.png, id=1765, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_GM 12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1I ggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input lin e 39. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [94 <./images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_ + [96 <./images/ch_encode_peaks/wgEncodeAwgTfbsUwGm12878CtcfUniPk_MNase_GM12878_ allpeaks_EM_4class_15shift_flip.png (PNG copy)>] +aks_EM_4class_15shift_flip.png, id=1770, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM12878_ allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosU niPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input line 47. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [95 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_ + [97 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Nrf1IggmusUniPk_MNase_ GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] +allpeaks_EM_4class_15shift_flip.png, id=1776, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_GM1 2878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIg gmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on input line 55. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [96 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM1287 + [98 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878CfosUniPk_MNase_GM1287 8_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] +12878_allpeaks_EM_4class_15shift_flip.png, id=1781, 602.25pt x 903.375pt> File: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_MNa se_GM12878_allpeaks_EM_4class_15shift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1 a300IggmusUniPk_MNase_GM12878_allpeaks_EM_4class_15shift_flip.png used on inpu t line 63. (pdftex.def) Requested size: 301.12425pt x 451.6864pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [97 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_G + [99 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878MaxIggmusUniPk_MNase_G M12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] - + File: images/ch_encode_peaks/ctcf_ndr.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ctcf_ndr.png used on input lin e 71. (pdftex.def) Requested size: 346.89647pt x 462.52863pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [98 <./images/ch_encode_peaks/wgEncodeAwgTfbsSydhGm12878Brca1a300IggmusUniPk_M -Nase_GM12878_allpeaks_EM_4class_15shift_flip.png (PNG copy)>] -] + File: images/ch_encode_peaks/jund_motif_association.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/jund_motif_association.png use d on input line 79. (pdftex.def) Requested size: 433.61232pt x 339.18118pt. Overfull \hbox (15.92586pt too wide) in paragraph at lines 79--80 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [99 <./images/ch_encode_peaks/ctcf_ndr.png (PNG copy)>] - + [101 <./images/ch_encode_peaks/ctcf_ndr.png (PNG copy)>] + File: images/ch_encode_peaks/ebf1_haib_3.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_3.png used on input line 87. (pdftex.def) Requested size: 260.16739pt x 115.62994pt. - + File: images/ch_encode_peaks/MA0154_3.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/MA0154_3.png used on input lin e 95. (pdftex.def) Requested size: 361.3491pt x 180.67456pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [100 <./images/ch_encode_peaks/jund_motif_association.png>] - + [102 <./images/ch_encode_peaks/jund_motif_association.png>] + File: images/ch_encode_peaks/ebf1_haib_2.png Graphic file (type png) Package pdftex.def Info: images/ch_encode_peaks/ebf1_haib_2.png used on input line 103. (pdftex.def) Requested size: 260.16739pt x 346.88986pt. - File: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png used on input line 113. (pdftex.def) Requested size: 390.26102pt x 173.44933pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [101 <./images/ch_encode_peaks/ebf1_haib_3.png (PNG copy)> <./images/ch_encode -_peaks/MA0154_3.png>] [102 <./images/ch_encode_peaks/ebf1_haib_2.png (PNG copy) + [103 <./images/ch_encode_peaks/ebf1_haib_3.png (PNG copy)> <./images/ch_encode +_peaks/MA0154_3.png>] [104 <./images/ch_encode_peaks/ebf1_haib_2.png (PNG copy) >] - File: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png Graphic file (type pn g) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_6class_shift_flip.png u sed on input line 121. (pdftex.def) Requested size: 390.26102pt x 173.44933pt. - File: images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png Graphi c file (type png) Package pdftex.def Info: images/ch_atac-seq/simulated_sequences_2class_flip_bes t_motifs.png used on input line 129. (pdftex.def) Requested size: 202.3524pt x 231.2599pt. Underfull \vbox (badness 10000) has occurred while \output is active [] - [103 <./images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png (PNG copy)> <./i + [105 <./images/ch_atac-seq/sp1_motifs_6class_noshift_flip.png (PNG copy)> <./i mages/ch_atac-seq/sp1_motifs_6class_shift_flip.png (PNG copy)>] - + File: images/ch_atac-seq/sp1_motifs_10class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_10class.png used on inp ut line 137. (pdftex.def) Requested size: 455.30783pt x 303.53854pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 137--138 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [104 <./images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png (PN + [106 <./images/ch_atac-seq/simulated_sequences_2class_flip_best_motifs.png (PN G copy)>] File: images/ch_atac-seq/sp1_motifs_10class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/sp1_motifs_10class.png used on inp ut line 145. (pdftex.def) Requested size: 455.30783pt x 303.53854pt. Overfull \hbox (37.62137pt too wide) in paragraph at lines 145--146 [] [] Underfull \vbox (badness 10000) has occurred while \output is active [] - [105 <./images/ch_atac-seq/sp1_motifs_10class.png (PNG copy)>] -] + File: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/peaks_rmsk_sampled_sequences_23clas s.png used on input line 153. (pdftex.def) Requested size: 390.26102pt x 260.17401pt. - + File: images/ch_atac-seq/data_classPU1_2class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/data_classPU1_2class.png used on i nput line 161. (pdftex.def) Requested size: 433.62335pt x 216.81166pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 161--162 [] [] - + File: images/ch_atac-seq/data_classjun_3class.png Graphic file (type png) Package pdftex.def Info: images/ch_atac-seq/data_classjun_3class.png used on i nput line 169. (pdftex.def) Requested size: 433.62335pt x 216.81166pt. Overfull \hbox (15.93689pt too wide) in paragraph at lines 169--170 [] [] -) [106] [107 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png (PN -G copy)>] [108 <./images/ch_atac-seq/data_classPU1_2class.png (PNG copy)>] -[109 <./images/ch_atac-seq/data_classjun_3class.png (PNG copy)>] [110 +) [108] [109 <./images/ch_atac-seq/peaks_rmsk_sampled_sequences_23class.png (PN +G copy)>] [110 <./images/ch_atac-seq/data_classPU1_2class.png (PNG copy)>] +[111 <./images/ch_atac-seq/data_classjun_3class.png (PNG copy)>] [112 ] \openout2 = `tail/biblio.aux'. -(./tail/biblio.tex (./my_thesis.bbl [111 +(./tail/biblio.tex (./my_thesis.bbl [113 -] [112] [113] [114] [115] [116] -[117] [118] [119])) [120] +] [114] [115] [116] [117] [118] +[119] [120] [121])) [122] \openout2 = `tail/cv.aux'. (./tail/cv.tex - + File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf used on input line 6. (pdftex.def) Requested size: 597.22978pt x 845.15544pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf used on input line 6. (pdftex.def) Requested size: 597.22978pt x 845.15544pt. - + File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.22978pt x 845.15544pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page1 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. - [121 + [123 <./tail/cv.pdf>] - + File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page2 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page2 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. File: tail/cv.pdf Graphic file (type pdf) Package pdftex.def Info: tail/cv.pdf , page2 used on input line 6. (pdftex.def) Requested size: 597.25711pt x 845.19412pt. - [122 <./tail/cv.pdf>]) + [124 <./tail/cv.pdf>]) Package atveryend Info: Empty hook `BeforeClearDocument' on input line 82. Package atveryend Info: Empty hook `AfterLastShipout' on input line 82. (./my_thesis.aux (./head/dedication.aux) (./head/acknowledgements.aux) (./head/preface.aux) (./head/abstracts.aux) (./main/ch_introduction.aux) (./main/ch_group_projects.aux) (./main/ch_encode_peaks.aux) (./main/ch_smile-seq.aux) (./main/ch_atac-seq.aux) (./tail/appendix.aux) (./tail/biblio.aux) (./tail/cv.aux)) Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 82. Package atveryend Info: Empty hook `AtEndAfterFileList' on input line 82. LaTeX Warning: There were undefined references. LaTeX Warning: There were multiply-defined labels. Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 82. ) Here is how much of TeX's memory you used: - 28066 strings out of 492982 - 498903 string characters out of 6134895 - 763281 words of memory out of 5000000 - 30126 multiletter control sequences out of 15000+600000 + 28095 strings out of 492982 + 499764 string characters out of 6134895 + 764500 words of memory out of 5000000 + 30146 multiletter control sequences out of 15000+600000 147661 words of font info for 346 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 57i,24n,79p,2621b,1323s stack positions out of 5000i,500n,10000p,200000b,80000s {/usr/share/texmf/fonts/enc/dvips/lm/lm-ec.enc}{/usr/share/texlive/texmf-dist /fonts/enc/dvips/base/8r.enc} -Output written on my_thesis.pdf (140 pages, 86749207 bytes). +Output written on my_thesis.pdf (142 pages, 87699765 bytes). PDF statistics: - 2347 PDF objects out of 2487 (max. 8388607) - 2034 compressed objects within 21 object streams - 515 named destinations out of 1000 (max. 500000) - 33885 words of extra memory for PDF output out of 35830 (max. 10000000) + 2396 PDF objects out of 2487 (max. 8388607) + 2077 compressed objects within 21 object streams + 523 named destinations out of 1000 (max. 500000) + 33890 words of extra memory for PDF output out of 35830 (max. 10000000) diff --git a/my_thesis.pdf b/my_thesis.pdf index e8a24f2..6842061 100644 Binary files a/my_thesis.pdf and b/my_thesis.pdf differ diff --git a/my_thesis.synctex.gz b/my_thesis.synctex.gz index b9b85d8..9652520 100644 Binary files a/my_thesis.synctex.gz and b/my_thesis.synctex.gz differ diff --git a/my_thesis.toc b/my_thesis.toc index 524c247..f3b22ea 100644 --- a/my_thesis.toc +++ b/my_thesis.toc @@ -1,115 +1,115 @@ \babel@toc {english}{} \babel@toc {french}{} \babel@toc {english}{} \contentsline {chapter}{Acknowledgements}{i}{chapter*.1} \contentsline {chapter}{Preface}{iii}{chapter*.2} \contentsline {chapter}{Abstract (English/Fran\IeC {\c c}ais/Deutsch)}{v}{chapter*.3} \babel@toc {german}{} \babel@toc {english}{} \babel@toc {french}{} \babel@toc {english}{} \contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1} \contentsline {chapter}{Introduction}{1}{chapter.1} \contentsline {section}{\numberline {1.1}About chromatin}{1}{section.1.1} \contentsline {subsection}{\numberline {1.1.1}The chromatin structure}{1}{subsection.1.1.1} -\contentsline {subsection}{\numberline {1.1.2}The chromatin is a dynamic structure}{3}{subsection.1.1.2} -\contentsline {subsection}{\numberline {1.1.3}About nucleosome positioning}{4}{subsection.1.1.3} -\contentsline {subsection}{\numberline {1.1.4}Measuring nucleosome occupancy}{4}{subsection.1.1.4} -\contentsline {subsection}{\numberline {1.1.5}About chromatin domains}{4}{subsection.1.1.5} -\contentsline {subsection}{\numberline {1.1.6}Regulatory elements}{4}{subsection.1.1.6} -\contentsline {subsection}{\numberline {1.1.7}Pioneering factors, a special class of TFs}{4}{subsection.1.1.7} -\contentsline {subsection}{\numberline {1.1.8}Digital footprinting}{5}{subsection.1.1.8} -\contentsline {section}{\numberline {1.2}About transcription factors}{5}{section.1.2} -\contentsline {subsection}{\numberline {1.2.1}How chromatin affects TF binding}{5}{subsection.1.2.1} -\contentsline {subsection}{\numberline {1.2.2}Modeling sequence specificity}{5}{subsection.1.2.2} -\contentsline {subsection}{\numberline {1.2.3}TF co-binding}{5}{subsection.1.2.3} -\contentsline {subsection}{\numberline {1.2.4}Measuring TF binding in vivo}{5}{subsection.1.2.4} -\contentsline {subsection}{\numberline {1.2.5}Measuring TF binding in vitro}{5}{subsection.1.2.5} -\contentsline {section}{\numberline {1.3}Data analysis}{5}{section.1.3} -\contentsline {chapter}{\numberline {2}Published laboratory projects}{7}{chapter.2} -\contentsline {chapter}{Published laboratory projects}{7}{chapter.2} -\contentsline {section}{\numberline {2.1}Mass Genome Annotation repository}{7}{section.2.1} -\contentsline {subsection}{\numberline {2.1.1}Introduction}{7}{subsection.2.1.1} -\contentsline {subsection}{\numberline {2.1.2}MGA content and organization}{7}{subsection.2.1.2} -\contentsline {subsection}{\numberline {2.1.3}Conclusions}{9}{subsection.2.1.3} -\contentsline {section}{\numberline {2.2}Eukaryotic Promoter Database}{10}{section.2.2} -\contentsline {subsection}{\numberline {2.2.1}Introduction}{11}{subsection.2.2.1} -\contentsline {subsection}{\numberline {2.2.2}EPDnew now annotates (some of) your mushrooms and vegetables}{11}{subsection.2.2.2} -\contentsline {subsection}{\numberline {2.2.3}Increased mapping precision in human}{11}{subsection.2.2.3} -\contentsline {subsection}{\numberline {2.2.4}Integration of EPDnew with other resources}{13}{subsection.2.2.4} -\contentsline {subsection}{\numberline {2.2.5}Conclusions}{14}{subsection.2.2.5} -\contentsline {subsection}{\numberline {2.2.6}Methods}{14}{subsection.2.2.6} -\contentsline {subsubsection}{Motif occurrence profiles}{14}{subsection.2.2.6} -\contentsline {section}{\numberline {2.3}PWMScan}{15}{section.2.3} -\contentsline {subsection}{\numberline {2.3.1}Introduction}{15}{subsection.2.3.1} -\contentsline {subsection}{\numberline {2.3.2}Data and methods}{17}{subsection.2.3.2} -\contentsline {subsection}{\numberline {2.3.3}Benchmark}{18}{subsection.2.3.3} -\contentsline {subsection}{\numberline {2.3.4}Conclusions}{20}{subsection.2.3.4} -\contentsline {section}{\numberline {2.4}SPar-K}{21}{section.2.4} -\contentsline {subsection}{\numberline {2.4.1}Introduction}{21}{subsection.2.4.1} -\contentsline {subsection}{\numberline {2.4.2}Methods}{21}{subsection.2.4.2} -\contentsline {subsection}{\numberline {2.4.3}Results}{25}{subsection.2.4.3} -\contentsline {subsection}{\numberline {2.4.4}Conclusion}{25}{subsection.2.4.4} -\contentsline {chapter}{\numberline {3}ENCODE peaks analysis}{27}{chapter.3} -\contentsline {chapter}{ENCODE peaks analysis}{27}{chapter.3} -\contentsline {section}{\numberline {3.1}Data}{27}{section.3.1} -\contentsline {section}{\numberline {3.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{29}{section.3.2} -\contentsline {subsection}{\numberline {3.2.1}Data realignment}{30}{subsection.3.2.1} -\contentsline {section}{\numberline {3.3}Nucleosome organization around transcription factor binding sites}{31}{section.3.3} -\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{33}{section.3.4} -\contentsline {section}{\numberline {3.5}CTCF and JunD interactomes}{37}{section.3.5} -\contentsline {section}{\numberline {3.6}EBF1 binds nucleosomes}{42}{section.3.6} -\contentsline {section}{\numberline {3.7}Methods}{44}{section.3.7} -\contentsline {subsection}{\numberline {3.7.1}Data and data processing}{44}{subsection.3.7.1} -\contentsline {subsection}{\numberline {3.7.2}Classification of MNase patterns}{45}{subsection.3.7.2} -\contentsline {subsection}{\numberline {3.7.3}Quantifying nucleosome array intensity from classification results}{46}{subsection.3.7.3} -\contentsline {subsection}{\numberline {3.7.4}Peak colocalization}{47}{subsection.3.7.4} -\contentsline {subsection}{\numberline {3.7.5}NDR detection}{47}{subsection.3.7.5} -\contentsline {subsection}{\numberline {3.7.6}CTCF and JunD interactors}{50}{subsection.3.7.6} -\contentsline {subsection}{\numberline {3.7.7}EBF1 and nucleosome}{51}{subsection.3.7.7} -\contentsline {chapter}{\numberline {4}SMiLE-seq data analysis}{53}{chapter.4} -\contentsline {chapter}{SMiLE-seq data analysis}{53}{chapter.4} -\contentsline {subsection}{\numberline {4.0.1}Introduction}{53}{subsection.4.0.1} -\contentsline {subsection}{\numberline {4.0.2}Hidden Markov Model Motif discovery}{55}{subsection.4.0.2} -\contentsline {subsection}{\numberline {4.0.3}Binding motif evaluation}{56}{subsection.4.0.3} -\contentsline {subsection}{\numberline {4.0.4}Results}{58}{subsection.4.0.4} -\contentsline {subsection}{\numberline {4.0.5}Conclusions}{60}{subsection.4.0.5} -\contentsline {chapter}{\numberline {5}Chromatin accessibility of monocytes}{61}{chapter.5} -\contentsline {section}{\numberline {5.1}ATAC-seq}{61}{section.5.1} -\contentsline {section}{\numberline {5.2}Monitoring TF binding}{63}{section.5.2} -\contentsline {section}{\numberline {5.3}The advent of single cell DGF}{64}{section.5.3} -\contentsline {section}{\numberline {5.4}Open issues}{64}{section.5.4} -\contentsline {section}{\numberline {5.5}Data}{64}{section.5.5} -\contentsline {section}{\numberline {5.6}Identifying over-represented signals}{65}{section.5.6} -\contentsline {subsection}{\numberline {5.6.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{65}{subsection.5.6.1} -\contentsline {subsection}{\numberline {5.6.2}EMSequence : an algorithm to identify over-represented sequences}{65}{subsection.5.6.2} -\contentsline {subsubsection}{without shift and flip}{67}{figure.caption.32} -\contentsline {subsubsection}{with shift and flip}{67}{equation.5.6.2} -\contentsline {subsection}{\numberline {5.6.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{69}{subsection.5.6.3} -\contentsline {subsection}{\numberline {5.6.4}Data realignment}{70}{subsection.5.6.4} -\contentsline {section}{\numberline {5.7}Results}{70}{section.5.7} -\contentsline {subsection}{\numberline {5.7.1}Fragment size analysis}{70}{subsection.5.7.1} -\contentsline {subsection}{\numberline {5.7.2}Measuring open chromatin and nucleosome occupancy}{73}{subsection.5.7.2} -\contentsline {subsection}{\numberline {5.7.3}Evaluation of EMSequence and ChIPPartitioning}{75}{subsection.5.7.3} -\contentsline {subsubsection}{EMSequence}{75}{subsection.5.7.3} -\contentsline {subsubsection}{ChIPPartitioning}{78}{figure.caption.37} -\contentsline {section}{\numberline {5.8}Aligning the binding sites}{80}{section.5.8} -\contentsline {section}{\numberline {5.9}Exploring individual TF classes}{83}{section.5.9} -\contentsline {section}{\numberline {5.10}Discussions}{84}{section.5.10} -\contentsline {section}{\numberline {5.11}Perspectives}{84}{section.5.11} -\contentsline {section}{\numberline {5.12}Methods}{85}{section.5.12} -\contentsline {subsection}{\numberline {5.12.1}Partitioning programs}{85}{subsection.5.12.1} -\contentsline {subsection}{\numberline {5.12.2}Fragment classes}{85}{subsection.5.12.2} -\contentsline {subsection}{\numberline {5.12.3}Simulated sequences}{86}{subsection.5.12.3} -\contentsline {subsection}{\numberline {5.12.4}Realignment using JASPAR motifs}{86}{subsection.5.12.4} -\contentsline {subsection}{\numberline {5.12.5}Model extension}{88}{subsection.5.12.5} -\contentsline {subsection}{\numberline {5.12.6}Extracting data assigned to a class}{88}{subsection.5.12.6} -\contentsline {subsection}{\numberline {5.12.7}Peak processing}{91}{subsection.5.12.7} -\contentsline {subsection}{\numberline {5.12.8}Per TF classes}{91}{subsection.5.12.8} -\contentsline {subsection}{\numberline {5.12.9}Per TF sub-classes}{91}{subsection.5.12.9} -\contentsline {chapter}{\numberline {A}An appendix}{93}{appendix.A} -\contentsline {section}{\numberline {A.1}Supplementary figures}{93}{section.A.1} +\contentsline {subsection}{\numberline {1.1.2}The chromatin is dynamic}{3}{subsection.1.1.2} +\contentsline {subsection}{\numberline {1.1.3}Measuring nucleosome occupancy}{4}{subsection.1.1.3} +\contentsline {subsection}{\numberline {1.1.4}About nucleosome positioning}{4}{subsection.1.1.4} +\contentsline {subsection}{\numberline {1.1.5}About chromatin domains}{6}{subsection.1.1.5} +\contentsline {subsection}{\numberline {1.1.6}Regulatory elements}{7}{subsection.1.1.6} +\contentsline {subsection}{\numberline {1.1.7}Pioneering factors, a special class of TFs}{7}{subsection.1.1.7} +\contentsline {subsection}{\numberline {1.1.8}Digital footprinting}{7}{subsection.1.1.8} +\contentsline {section}{\numberline {1.2}About transcription factors}{7}{section.1.2} +\contentsline {subsection}{\numberline {1.2.1}How chromatin affects TF binding}{7}{subsection.1.2.1} +\contentsline {subsection}{\numberline {1.2.2}Modeling sequence specificity}{7}{subsection.1.2.2} +\contentsline {subsection}{\numberline {1.2.3}TF co-binding}{7}{subsection.1.2.3} +\contentsline {subsection}{\numberline {1.2.4}Measuring TF binding in vivo}{7}{subsection.1.2.4} +\contentsline {subsection}{\numberline {1.2.5}Measuring TF binding in vitro}{8}{subsection.1.2.5} +\contentsline {section}{\numberline {1.3}Data analysis}{8}{section.1.3} +\contentsline {chapter}{\numberline {2}Published laboratory projects}{9}{chapter.2} +\contentsline {chapter}{Published laboratory projects}{9}{chapter.2} +\contentsline {section}{\numberline {2.1}Mass Genome Annotation repository}{9}{section.2.1} +\contentsline {subsection}{\numberline {2.1.1}Introduction}{9}{subsection.2.1.1} +\contentsline {subsection}{\numberline {2.1.2}MGA content and organization}{9}{subsection.2.1.2} +\contentsline {subsection}{\numberline {2.1.3}Conclusions}{11}{subsection.2.1.3} +\contentsline {section}{\numberline {2.2}Eukaryotic Promoter Database}{12}{section.2.2} +\contentsline {subsection}{\numberline {2.2.1}Introduction}{13}{subsection.2.2.1} +\contentsline {subsection}{\numberline {2.2.2}EPDnew now annotates (some of) your mushrooms and vegetables}{13}{subsection.2.2.2} +\contentsline {subsection}{\numberline {2.2.3}Increased mapping precision in human}{13}{subsection.2.2.3} +\contentsline {subsection}{\numberline {2.2.4}Integration of EPDnew with other resources}{15}{subsection.2.2.4} +\contentsline {subsection}{\numberline {2.2.5}Conclusions}{16}{subsection.2.2.5} +\contentsline {subsection}{\numberline {2.2.6}Methods}{16}{subsection.2.2.6} +\contentsline {subsubsection}{Motif occurrence profiles}{16}{subsection.2.2.6} +\contentsline {section}{\numberline {2.3}PWMScan}{17}{section.2.3} +\contentsline {subsection}{\numberline {2.3.1}Introduction}{17}{subsection.2.3.1} +\contentsline {subsection}{\numberline {2.3.2}Data and methods}{19}{subsection.2.3.2} +\contentsline {subsection}{\numberline {2.3.3}Benchmark}{20}{subsection.2.3.3} +\contentsline {subsection}{\numberline {2.3.4}Conclusions}{22}{subsection.2.3.4} +\contentsline {section}{\numberline {2.4}SPar-K}{23}{section.2.4} +\contentsline {subsection}{\numberline {2.4.1}Introduction}{23}{subsection.2.4.1} +\contentsline {subsection}{\numberline {2.4.2}Methods}{23}{subsection.2.4.2} +\contentsline {subsection}{\numberline {2.4.3}Results}{27}{subsection.2.4.3} +\contentsline {subsection}{\numberline {2.4.4}Conclusion}{27}{subsection.2.4.4} +\contentsline {chapter}{\numberline {3}ENCODE peaks analysis}{29}{chapter.3} +\contentsline {chapter}{ENCODE peaks analysis}{29}{chapter.3} +\contentsline {section}{\numberline {3.1}Data}{29}{section.3.1} +\contentsline {section}{\numberline {3.2}ChIPPartitioning : an algorithm to identify chromatin architectures}{31}{section.3.2} +\contentsline {subsection}{\numberline {3.2.1}Data realignment}{32}{subsection.3.2.1} +\contentsline {section}{\numberline {3.3}Nucleosome organization around transcription factor binding sites}{33}{section.3.3} +\contentsline {section}{\numberline {3.4}The case of CTCF, RAD21, SMC3, YY1 and ZNF143}{35}{section.3.4} +\contentsline {section}{\numberline {3.5}CTCF and JunD interactomes}{39}{section.3.5} +\contentsline {section}{\numberline {3.6}EBF1 binds nucleosomes}{44}{section.3.6} +\contentsline {section}{\numberline {3.7}Methods}{46}{section.3.7} +\contentsline {subsection}{\numberline {3.7.1}Data and data processing}{46}{subsection.3.7.1} +\contentsline {subsection}{\numberline {3.7.2}Classification of MNase patterns}{47}{subsection.3.7.2} +\contentsline {subsection}{\numberline {3.7.3}Quantifying nucleosome array intensity from classification results}{48}{subsection.3.7.3} +\contentsline {subsection}{\numberline {3.7.4}Peak colocalization}{49}{subsection.3.7.4} +\contentsline {subsection}{\numberline {3.7.5}NDR detection}{49}{subsection.3.7.5} +\contentsline {subsection}{\numberline {3.7.6}CTCF and JunD interactors}{52}{subsection.3.7.6} +\contentsline {subsection}{\numberline {3.7.7}EBF1 and nucleosome}{53}{subsection.3.7.7} +\contentsline {chapter}{\numberline {4}SMiLE-seq data analysis}{55}{chapter.4} +\contentsline {chapter}{SMiLE-seq data analysis}{55}{chapter.4} +\contentsline {subsection}{\numberline {4.0.1}Introduction}{55}{subsection.4.0.1} +\contentsline {subsection}{\numberline {4.0.2}Hidden Markov Model Motif discovery}{57}{subsection.4.0.2} +\contentsline {subsection}{\numberline {4.0.3}Binding motif evaluation}{58}{subsection.4.0.3} +\contentsline {subsection}{\numberline {4.0.4}Results}{60}{subsection.4.0.4} +\contentsline {subsection}{\numberline {4.0.5}Conclusions}{62}{subsection.4.0.5} +\contentsline {chapter}{\numberline {5}Chromatin accessibility of monocytes}{63}{chapter.5} +\contentsline {section}{\numberline {5.1}ATAC-seq}{63}{section.5.1} +\contentsline {section}{\numberline {5.2}Monitoring TF binding}{65}{section.5.2} +\contentsline {section}{\numberline {5.3}The advent of single cell DGF}{66}{section.5.3} +\contentsline {section}{\numberline {5.4}Open issues}{66}{section.5.4} +\contentsline {section}{\numberline {5.5}Data}{66}{section.5.5} +\contentsline {section}{\numberline {5.6}Identifying over-represented signals}{67}{section.5.6} +\contentsline {subsection}{\numberline {5.6.1}ChIPPartitioning : an algorithm to identify over-represented read patterns}{67}{subsection.5.6.1} +\contentsline {subsection}{\numberline {5.6.2}EMSequence : an algorithm to identify over-represented sequences}{67}{subsection.5.6.2} +\contentsline {subsubsection}{without shift and flip}{69}{figure.caption.33} +\contentsline {subsubsection}{with shift and flip}{69}{equation.5.6.2} +\contentsline {subsection}{\numberline {5.6.3}EMJoint : an algorithm to identify over-represented sequences and chromatin architectures}{71}{subsection.5.6.3} +\contentsline {subsection}{\numberline {5.6.4}Data realignment}{72}{subsection.5.6.4} +\contentsline {section}{\numberline {5.7}Results}{72}{section.5.7} +\contentsline {subsection}{\numberline {5.7.1}Fragment size analysis}{72}{subsection.5.7.1} +\contentsline {subsection}{\numberline {5.7.2}Measuring open chromatin and nucleosome occupancy}{75}{subsection.5.7.2} +\contentsline {subsection}{\numberline {5.7.3}Evaluation of EMSequence and ChIPPartitioning}{77}{subsection.5.7.3} +\contentsline {subsubsection}{EMSequence}{77}{subsection.5.7.3} +\contentsline {subsubsection}{ChIPPartitioning}{80}{figure.caption.38} +\contentsline {section}{\numberline {5.8}Aligning the binding sites}{82}{section.5.8} +\contentsline {section}{\numberline {5.9}Exploring individual TF classes}{85}{section.5.9} +\contentsline {section}{\numberline {5.10}Discussions}{86}{section.5.10} +\contentsline {section}{\numberline {5.11}Perspectives}{86}{section.5.11} +\contentsline {section}{\numberline {5.12}Methods}{87}{section.5.12} +\contentsline {subsection}{\numberline {5.12.1}Partitioning programs}{87}{subsection.5.12.1} +\contentsline {subsection}{\numberline {5.12.2}Fragment classes}{87}{subsection.5.12.2} +\contentsline {subsection}{\numberline {5.12.3}Simulated sequences}{88}{subsection.5.12.3} +\contentsline {subsection}{\numberline {5.12.4}Realignment using JASPAR motifs}{88}{subsection.5.12.4} +\contentsline {subsection}{\numberline {5.12.5}Model extension}{90}{subsection.5.12.5} +\contentsline {subsection}{\numberline {5.12.6}Extracting data assigned to a class}{90}{subsection.5.12.6} +\contentsline {subsection}{\numberline {5.12.7}Peak processing}{93}{subsection.5.12.7} +\contentsline {subsection}{\numberline {5.12.8}Per TF classes}{93}{subsection.5.12.8} +\contentsline {subsection}{\numberline {5.12.9}Per TF sub-classes}{93}{subsection.5.12.9} +\contentsline {chapter}{\numberline {A}An appendix}{95}{appendix.A} +\contentsline {section}{\numberline {A.1}Supplementary figures}{95}{section.A.1} \vspace {\normalbaselineskip } -\contentsline {chapter}{Bibliography}{111}{section*.62} -\contentsline {chapter}{Bibliography}{120}{appendix*.63} -\contentsline {chapter}{Curriculum Vitae}{121}{section*.64} +\contentsline {chapter}{Bibliography}{113}{section*.63} +\contentsline {chapter}{Bibliography}{122}{appendix*.64} +\contentsline {chapter}{Curriculum Vitae}{123}{section*.65} diff --git a/tail/appendix.aux b/tail/appendix.aux index e3424d5..05fb013 100644 --- a/tail/appendix.aux +++ b/tail/appendix.aux @@ -1,97 +1,97 @@ \relax \providecommand\hyper@newdestlabel[2]{} \citation{jolma_dna-binding_2013} \citation{jolma_dna-binding_2013} -\@writefile{toc}{\contentsline {chapter}{\numberline {A}An appendix}{93}{appendix.A}} +\@writefile{toc}{\contentsline {chapter}{\numberline {A}An appendix}{95}{appendix.A}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{loa}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {A.1}Supplementary figures}{93}{section.A.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.1}{\ignorespaces \textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }}{93}{figure.caption.43}} -\newlabel{suppl_smileseq_auc_2}{{A.1}{93}{\textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }{figure.caption.43}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.2}{\ignorespaces \textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{94}{figure.caption.44}} -\newlabel{suppl_encode_peaks_em_ctcf}{{A.2}{94}{\textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.44}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.3}{\ignorespaces \textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{95}{figure.caption.45}} -\newlabel{suppl_encode_peaks_em_nrf1}{{A.3}{95}{\textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.45}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.4}{\ignorespaces \textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{96}{figure.caption.46}} -\newlabel{suppl_encode_peaks_em_cfos}{{A.4}{96}{\textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.46}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.5}{\ignorespaces \textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{97}{figure.caption.47}} -\newlabel{suppl_encode_peaks_em_max}{{A.5}{97}{\textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.47}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.6}{\ignorespaces \textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{98}{figure.caption.48}} -\newlabel{suppl_encode_peaks_em_brca1}{{A.6}{98}{\textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.48}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.7}{\ignorespaces \textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }}{99}{figure.caption.49}} -\newlabel{suppl_encode_peaks_ctcf_ndr}{{A.7}{99}{\textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }{figure.caption.49}{}} +\@writefile{toc}{\contentsline {section}{\numberline {A.1}Supplementary figures}{95}{section.A.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.1}{\ignorespaces \textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }}{95}{figure.caption.44}} +\newlabel{suppl_smileseq_auc_2}{{A.1}{95}{\textbf {Predictive power of SMiLE-seq :} \textbf {A} binding models were derived de novo from HT-SELEX 1st cycle data using the HMM discovery method (labelled HT-SELEX cycle 1 HMM) and their performances were assessed using the AUC-ROC. AUC-ROC values for the corresponding TF models derived from SMiLe-seq data (labelled SMiLE-seq) and reported by Jolma and colleagues (labelled HT-SELEX reported matrices, \cite {jolma_dna-binding_2013}) are also displayed. \textbf {B} the predictive performances of CEBPb, CTCF and TCF7 binding models were assessed using subsets of binding sites of decreasing affinities. Inside each peak list, the peaks were ranked by score and subsets of 500 peaks were selected. Peaks 1-500 have the highest affinity, then peaks 501-1000, and so on. The boxplots indicate the distribution of AUC-ROC obtained over all available peak-lists.\relax }{figure.caption.44}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.2}{\ignorespaces \textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{96}{figure.caption.45}} +\newlabel{suppl_encode_peaks_em_ctcf}{{A.2}{96}{\textbf {Chromatine architectures around CTCF binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.45}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.3}{\ignorespaces \textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{97}{figure.caption.46}} +\newlabel{suppl_encode_peaks_em_nrf1}{{A.3}{97}{\textbf {Chromatine architectures around NRF1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.46}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.4}{\ignorespaces \textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{98}{figure.caption.47}} +\newlabel{suppl_encode_peaks_em_cfos}{{A.4}{98}{\textbf {Chromatine architectures around cFOS binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.47}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.5}{\ignorespaces \textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{99}{figure.caption.48}} +\newlabel{suppl_encode_peaks_em_max}{{A.5}{99}{\textbf {Chromatine architectures around max binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.48}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.6}{\ignorespaces \textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }}{100}{figure.caption.49}} +\newlabel{suppl_encode_peaks_em_brca1}{{A.6}{100}{\textbf {Chromatine architectures around BRCA1 binding sites} discovered using ChIPPartitioning. The partition was done with respect to the MNase reads (red), +/- 1kb around the peaks, in bins of 10bp, that were allowed to be shifted and flipped. DNaseI (blue), TSS density (violet) and sequence conservation (green) were realigned according to MNase classification and overlaid. The y-axis scale represent the proportion of the highest signal for each chromatin pattern. The first row contains the aggregated signal over all sites. The number of binding sites (peaks) is indicated in parenthesis. The following rows contains the 4 classes discovered. Their overall probability is indicated atop of the class signal, on the right. The y-axis indicates the min/max signal for all densities.\relax }{figure.caption.49}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.7}{\ignorespaces \textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }}{101}{figure.caption.50}} +\newlabel{suppl_encode_peaks_ctcf_ndr}{{A.7}{101}{\textbf {Nucleosome occupancy around CTCF peaks } measured by MNase-seq, in bins of 10bp. The nucleosome depleted region is displayed in blue.\relax }{figure.caption.50}{}} \citation{khan_jaspar_2018} \citation{khan_jaspar_2018} -\@writefile{lof}{\contentsline {figure}{\numberline {A.8}{\ignorespaces \textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }}{100}{figure.caption.50}} -\newlabel{suppl_encode_peaks_jund_association}{{A.8}{100}{\textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }{figure.caption.50}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.9}{\ignorespaces \textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{101}{figure.caption.51}} -\newlabel{suppl_encode_peaks_ebf1_nucl}{{A.9}{101}{\textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.51}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.10}{\ignorespaces \textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }}{101}{figure.caption.52}} -\newlabel{suppl_encode_peaks_ebf1_logo}{{A.10}{101}{\textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }{figure.caption.52}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.11}{\ignorespaces \textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }}{102}{figure.caption.53}} -\newlabel{suppl_encode_peaks_ebf1_chrom}{{A.11}{102}{\textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }{figure.caption.53}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.8}{\ignorespaces \textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }}{102}{figure.caption.51}} +\newlabel{suppl_encode_peaks_jund_association}{{A.8}{102}{\textbf {JunD motif association} measured around the binding sites of different TFs. For a each TF, its binding sites, +/- 500bp, were searched for the presence of i) the TF motif and ii) CTCF motif. For each TF, a 2x2 contingency table was created with the number of peaks having i) both motifs, ii) the TF motif only, iii) CTCF motif only and iv) no motif. \textbf {A} Odd ratio (OR) of the exact Fisher test performed on each TF contingency table. The ORs are displayed with their 95\% confidence interval (CI). ORs > 1 - that is, with 1 not part of the 95\%CI - are labeled in green and indicate an association of both motifs more frequent than expected by chance. ORs < 1 are labeled in red and indicate a repulsion of both motifs more frequence than expected by chance. The JunD and cFos dataset ORs are too high to be represented in this plot. \textbf {B} Density of JunD motif occurrence at the absolute distance of different TF binding sites (peak centers) which also have their own motif present (at distance 0). The rows were standardized and aggregated using the Euclidean distance. \textbf {C} Same as in (B) but for TF binding sites that does not have their own motif.\relax }{figure.caption.51}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.9}{\ignorespaces \textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }}{103}{figure.caption.52}} +\newlabel{suppl_encode_peaks_ebf1_nucl}{{A.9}{103}{\textbf {EBF1 binding sites} around the dyad of nucleosomes having an occupied EBF1 motif within 100bp (in red) and of all nucleosomes (in blue). The abrupt decrease of EBF1 motif frequency at +/- 100bp reflects the nucleosome selection process.\relax }{figure.caption.52}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.10}{\ignorespaces \textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }}{103}{figure.caption.53}} +\newlabel{suppl_encode_peaks_ebf1_logo}{{A.10}{103}{\textbf {EBF1 logo} from JASPAR binding model MA0154.3 \citep {khan_jaspar_2018}.\relax }{figure.caption.53}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.11}{\ignorespaces \textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }}{104}{figure.caption.54}} +\newlabel{suppl_encode_peaks_ebf1_chrom}{{A.11}{104}{\textbf {EBF1 binding sites} chromatin features. \textbf {A} Chromatin accessibility around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {B} H3K4me2 deposition around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue). \textbf {C} Sequence conservation around nucleosomes that have an EBF1 binding site within 100bp (red) and all nucleosomes (blue).\relax }{figure.caption.54}{}} \citation{ou_motifstack_2018} \citation{ou_motifstack_2018} -\@writefile{lof}{\contentsline {figure}{\numberline {A.12}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{103}{figure.caption.54}} -\newlabel{suppl_emread_sp1_noshift_flip}{{A.12}{103}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.54}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.13}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{103}{figure.caption.55}} -\newlabel{suppl_emread_sp1_shift_flip}{{A.13}{103}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.55}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.14}{\ignorespaces \textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }}{104}{figure.caption.56}} -\newlabel{suppl_atac_seq_emseq_best_motifs}{{A.14}{104}{\textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }{figure.caption.56}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.15}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{105}{figure.caption.57}} -\newlabel{suppl_emseq_sp1_10class}{{A.15}{105}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.57}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.16}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{106}{figure.caption.58}} -\newlabel{suppl_emseq_sp1_10class}{{A.16}{106}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.58}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.17}{\ignorespaces \textbf {Extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{107}{figure.caption.59}} -\newlabel{suppl_atac_seq_23class}{{A.17}{107}{\textbf {Extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.59}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.18}{\ignorespaces \textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{108}{figure.caption.60}} -\newlabel{suppl_atac_seq_pu1_subclass}{{A.18}{108}{\textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.60}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {A.19}{\ignorespaces \textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{109}{figure.caption.61}} -\newlabel{suppl_atac_seq_ap1_subclass}{{A.19}{109}{\textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.61}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.12}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{105}{figure.caption.55}} +\newlabel{suppl_emread_sp1_noshift_flip}{{A.12}{105}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run without shifing (+/- 10bp) but with flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.55}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.13}{\ignorespaces \textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }}{105}{figure.caption.56}} +\newlabel{suppl_emread_sp1_shift_flip}{{A.13}{105}{\textbf {Open chromatin classes around SP1 motifs :} EMRead was run with shifing (+/- 10bp) flipping to identify different classes of footprints around 15'883 SP1 motifs. The aggregation signal around the 6 different classes found are shown by decreasing class probability. The open chromatin patterns are displayed in red, the nucleosomes are displayed in blue. The aggregated DNA sequence is displayed as a logo. The y-axis ranges from the minimum to the maximum signal observed. For the DNA logo, this corresponds to 0 and 2 bits respectively.\relax }{figure.caption.56}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.14}{\ignorespaces \textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }}{106}{figure.caption.57}} +\newlabel{suppl_atac_seq_emseq_best_motifs}{{A.14}{106}{\textbf {Simulated data motifs :} motifs used for the data generation (labeled "True motif") and the best scoring - based on the AUC - partition motifs (labeled "Found motif"). The partition with EMSequence was run such that it was searching for motifs of 11bp, slightly longer than those used for the data generation. "RC" stands for reverse complement. The motifs tree and alignment was build using the motifStack R package \citep {ou_motifstack_2018}.\relax }{figure.caption.57}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.15}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{107}{figure.caption.58}} +\newlabel{suppl_emseq_sp1_10class}{{A.15}{107}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.58}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.16}{\ignorespaces \textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }}{108}{figure.caption.59}} +\newlabel{suppl_emseq_sp1_10class}{{A.16}{108}{\textbf {SP1 motifs :} partition of 15'883 801bp sequences centered on a SP1 binding site using EMSequence. These sequences were classified by EMSequence to search for 10 different 30bp long motifs ($801 - 30 = 771$ of shifting freedom). The optimization was run for 20 iterations. The different classes are ordered by decreasing overall probability. Arrows atop of the motifs indicates head-to-tail arrangements of SP1 motifs.\relax }{figure.caption.59}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.17}{\ignorespaces \textbf {Extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{109}{figure.caption.60}} +\newlabel{suppl_atac_seq_23class}{{A.17}{109}{\textbf {Extended sequence and chromatin models} found in 10'000 monocytes regulatory regions. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.60}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.18}{\ignorespaces \textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{110}{figure.caption.61}} +\newlabel{suppl_atac_seq_pu1_subclass}{{A.18}{110}{\textbf {PU.1 sub-classes} obtained by extracting PU.1 class data and subjecting them to a ChIPPartitioning classification into 2 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.61}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {A.19}{\ignorespaces \textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }}{111}{figure.caption.62}} +\newlabel{suppl_atac_seq_ap1_subclass}{{A.19}{111}{\textbf {AP1 sub-classes} obtained by extracting AP1 class data and subjecting them to a ChIPPartitioning classification into 3 classes. The displayed logos correspond to each class sequence aggregation. The corresponding chromatin accessibility (red) and nucleosome occupancy (blue) are displayed atop of the logos. The classes are displayed by overall decreasing probability. A zoom over the central part of each class aggregation is shown in the top right inlet.\relax }{figure.caption.62}{}} \@setckpt{tail/appendix}{ -\setcounter{page}{110} +\setcounter{page}{112} \setcounter{equation}{0} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{1} \setcounter{section}{1} \setcounter{subsection}{0} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{19} \setcounter{table}{0} \setcounter{NAT@ctr}{0} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{39} \setcounter{algocfline}{3} \setcounter{algocfproc}{3} \setcounter{algocf}{3} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/tail/biblio.aux b/tail/biblio.aux index 91d5377..f98e3d7 100644 --- a/tail/biblio.aux +++ b/tail/biblio.aux @@ -1,155 +1,160 @@ \relax \providecommand\hyper@newdestlabel[2]{} \bibstyle{apalike} \bibdata{tail/bibliography} \bibcite{adey_rapid_2010}{{1}{2010}{{Adey et~al.}}{{}}} \bibcite{aerts_toucan:_2003}{{2}{2003}{{Aerts et~al.}}{{}}} \bibcite{aibar_scenic:_2017}{{3}{2017}{{Aibar et~al.}}{{}}} \bibcite{ambrosini_chip-seq_2016}{{4}{2016a}{{Ambrosini et~al.}}{{}}} \bibcite{ambrosini_chip-seq_2016-1}{{5}{2016b}{{Ambrosini et~al.}}{{}}} \bibcite{ambrosini_pwmscan:_2018}{{6}{2018}{{Ambrosini et~al.}}{{}}} \bibcite{ambrosini_signal_2003}{{7}{2003}{{Ambrosini et~al.}}{{}}} \bibcite{angerer_single_2017}{{8}{2017}{{Angerer et~al.}}{{}}} \bibcite{bailey_znf143_2015}{{9}{2015}{{Bailey et~al.}}{{}}} \bibcite{bailey_meme_2009}{{10}{2009}{{Bailey et~al.}}{{}}} \bibcite{barrett_ncbi_2011}{{11}{2011}{{Barrett et~al.}}{{}}} -\@writefile{toc}{\contentsline {chapter}{Bibliography}{111}{section*.62}} +\@writefile{toc}{\contentsline {chapter}{Bibliography}{113}{section*.63}} \bibcite{barski_high-resolution_2007}{{12}{2007}{{Barski et~al.}}{{}}} \bibcite{beckstette_fast_2006}{{13}{2006}{{Beckstette et~al.}}{{}}} \bibcite{berest_quantification_2018}{{14}{2018}{{Berest et~al.}}{{}}} \bibcite{berger_universal_2009}{{15}{2009}{{Berger and Bulyk}}{{}}} \bibcite{boller_defining_2018}{{16}{2018}{{Boller et~al.}}{{}}} \bibcite{boller_pioneering_2016}{{17}{2016}{{Boller et~al.}}{{}}} \bibcite{boyle_high-resolution_2008}{{18}{2008}{{Boyle et~al.}}{{}}} \bibcite{bucher_compilation_1986}{{19}{1986}{{Bucher and Trifonov}}{{}}} \bibcite{buenrostro_transposition_2013}{{20}{2013}{{Buenrostro et~al.}}{{}}} \bibcite{castro-mondragon_rsat_2017}{{21}{2017}{{Castro-Mondragon et~al.}}{{}}} \bibcite{chatr-aryamontri_biogrid_2017}{{22}{2017}{{Chatr-aryamontri et~al.}}{{}}} \bibcite{cheng_understanding_2012}{{23}{2012}{{Cheng et~al.}}{{}}} \bibcite{cirillo_opening_2002}{{24}{2002}{{Cirillo et~al.}}{{}}} \bibcite{consortium_integrated_2012}{{25}{2012}{{Consortium}}{{}}} \bibcite{dalton_clustering_2009}{{26}{2009}{{Dalton et~al.}}{{}}} \bibcite{donohoe_identification_2007}{{27}{2007}{{Donohoe et~al.}}{{}}} -\bibcite{dreos_epd_2013}{{28}{2013}{{Dreos et~al.}}{{}}} -\bibcite{dreos_eukaryotic_2017}{{29}{2017}{{Dreos et~al.}}{{}}} -\bibcite{dreos_mga_2018}{{30}{2018}{{Dreos et~al.}}{{}}} -\bibcite{dreos_eukaryotic_2015}{{31}{2015}{{Dreos et~al.}}{{}}} -\bibcite{fan_characterizing_2016}{{32}{2016}{{Fan et~al.}}{{}}} -\bibcite{fu_motifviz:_2004}{{33}{2004}{{Fu et~al.}}{{}}} -\bibcite{fu_insulator_2008}{{34}{2008}{{Fu et~al.}}{{}}} -\bibcite{gaffney_controls_2012}{{35}{2012}{{Gaffney et~al.}}{{}}} -\bibcite{gerstein_architecture_2012}{{36}{2012}{{Gerstein et~al.}}{{}}} -\bibcite{ghirlando_ctcf:_2016}{{37}{2016}{{Ghirlando and Felsenfeld}}{{}}} -\bibcite{gonzalez-blas_cistopic:_2019}{{38}{2019}{{Gonz\IeC {\'a}lez-Blas et~al.}}{{}}} -\bibcite{grant_fimo:_2011}{{39}{2011}{{Grant et~al.}}{{}}} -\bibcite{grossman_positional_2018}{{40}{2018}{{Grossman et~al.}}{{}}} -\bibcite{groux_spar-k:_2019}{{41}{2019}{{Groux and Bucher}}{{}}} -\bibcite{guo_high_2012}{{42}{2012}{{Guo et~al.}}{{}}} -\bibcite{hagman_early_2005}{{43}{2005}{{Hagman and Lukin}}{{}}} -\bibcite{heinz_simple_2010}{{44}{2010}{{Heinz et~al.}}{{}}} -\bibcite{henikoff_histone_2015}{{45}{2015}{{Henikoff and Smith}}{{}}} -\bibcite{hertz_identification_1990}{{46}{1990}{{Hertz et~al.}}{{}}} -\bibcite{hon_chromasig:_2008}{{47}{2008}{{Hon et~al.}}{{}}} -\bibcite{hyun_writing_2017}{{48}{2017}{{Hyun et~al.}}{{}}} -\bibcite{ioshikhes_variety_2011}{{49}{2011}{{Ioshikhes et~al.}}{{}}} -\bibcite{isakova_smile-seq_2017}{{50}{2017}{{Isakova et~al.}}{{}}} -\bibcite{jolma_multiplexed_2010}{{51}{2010}{{Jolma et~al.}}{{}}} -\bibcite{jolma_methods_2011-2}{{52}{2011}{{Jolma and Taipale}}{{}}} -\bibcite{jolma_dna-binding_2013}{{53}{2013}{{Jolma et~al.}}{{}}} -\bibcite{kent_blatblast-like_2002}{{54}{2002}{{Kent}}{{}}} -\bibcite{khan_jaspar_2018}{{55}{2018}{{Khan et~al.}}{{}}} -\bibcite{kiselev_sc3:_2017}{{56}{2017}{{Kiselev et~al.}}{{}}} -\bibcite{kouzarides_chromatin_2007}{{57}{2007}{{Kouzarides}}{{}}} -\bibcite{kulakovskiy_hocomoco:_2018}{{58}{2018}{{Kulakovskiy et~al.}}{{}}} -\bibcite{kulakovskiy_hocomoco:_2016}{{59}{2016}{{Kulakovskiy et~al.}}{{}}} -\bibcite{kundaje_ubiquitous_2012}{{60}{2012}{{Kundaje et~al.}}{{}}} -\bibcite{kurotaki_transcriptional_2017}{{61}{2017}{{Kurotaki et~al.}}{{}}} -\bibcite{langmead_fast_2012}{{62}{2012}{{Langmead and Salzberg}}{{}}} -\bibcite{langmead_ultrafast_2009}{{63}{2009}{{Langmead et~al.}}{{}}} -\bibcite{li_sequence_2009}{{64}{2009}{{Li et~al.}}{{}}} -\bibcite{li_identification_2019}{{65}{2019}{{Li et~al.}}{{}}} -\bibcite{lizio_gateways_2015}{{66}{2015}{{Lizio et~al.}}{{}}} -\bibcite{losada_cohesin_2014}{{67}{2014}{{Losada}}{{}}} -\bibcite{langst_chromatin_2015}{{68}{2015}{{L\IeC {\"a}ngst and Manelyte}}{{}}} -\bibcite{maerkl_systems_2007}{{69}{2007}{{Maerkl and Quake}}{{}}} -\bibcite{maier_early_2004}{{70}{2004}{{Maier et~al.}}{{}}} -\bibcite{marsland_machine_2015-1}{{71}{2015}{{Marsland}}{{}}} -\bibcite{mathelier_jaspar_2014}{{72}{2014}{{Mathelier et~al.}}{{}}} -\bibcite{mcginty_robert_k._and_tan_song_fundamentals_2014}{{73}{2014}{{McGinty Robert K. and Tan Song}}{{}}} -\bibcite{nair_probabilistic_2014}{{74}{2014}{{Nair et~al.}}{{}}} -\bibcite{neph_expansive_2012}{{75}{2012}{{Neph et~al.}}{{}}} -\bibcite{nielsen_catchprofiles:_2012}{{76}{2012}{{Nielsen et~al.}}{{}}} -\bibcite{ong_ctcf:_2014}{{77}{2014}{{Ong and Corces}}{{}}} -\bibcite{orenstein_comparative_2014}{{78}{2014}{{Orenstein and Shamir}}{{}}} -\bibcite{ou_motifstack_2018}{{79}{2018}{{Ou et~al.}}{{}}} -\bibcite{pizzi_fast_2008}{{80}{2008}{{Pizzi and Ukkonen}}{{}}} -\bibcite{pollard_detection_2010}{{81}{2010}{{Pollard et~al.}}{{}}} -\bibcite{quinlan_bedtools:_2010}{{82}{2010}{{Quinlan and Hall}}{{}}} -\bibcite{raney_track_2014}{{83}{2014}{{Raney et~al.}}{{}}} -\bibcite{rico_comparative_2017}{{84}{2017}{{Rico et~al.}}{{}}} -\bibcite{roadmap_epigenomics_consortium_integrative_2015}{{85}{2015}{{{Roadmap Epigenomics Consortium} et~al.}}{{}}} -\bibcite{rustici_arrayexpress_2013}{{86}{2013}{{Rustici et~al.}}{{}}} -\bibcite{schones_statistical_2007}{{87}{2007}{{Schones et~al.}}{{}}} -\bibcite{schutz_mamot:_2008}{{88}{2008}{{Sch\IeC {\"u}tz and Delorenzi}}{{}}} -\bibcite{siepel_evolutionarily_2005}{{89}{2005}{{Siepel et~al.}}{{}}} -\bibcite{soufi_pioneer_2015}{{90}{2015}{{Soufi et~al.}}{{}}} -\bibcite{stedman_cohesins_2008}{{91}{2008}{{Stedman et~al.}}{{}}} -\bibcite{trifonov_cracking_2011}{{92}{2011}{{Trifonov}}{{}}} -\bibcite{turatsinze_using_2008}{{93}{2008}{{Turatsinze et~al.}}{{}}} -\bibcite{vierstra_genomic_2016}{{94}{2016}{{Vierstra and Stamatoyannopoulos}}{{}}} -\bibcite{voss_dynamic_2014}{{95}{2014}{{Voss and Hager}}{{}}} -\bibcite{wang_sequence_2012}{{96}{2012}{{Wang et~al.}}{{}}} -\bibcite{weirauch_evaluation_2013}{{97}{2013}{{Weirauch et~al.}}{{}}} -\bibcite{wu_biogps:_2016}{{98}{2016}{{Wu et~al.}}{{}}} -\bibcite{zaret_pioneer_2011}{{99}{2011}{{Zaret and Carroll}}{{}}} -\bibcite{zhang_canonical_2014}{{100}{2014}{{Zhang et~al.}}{{}}} -\bibcite{zhao_tred:_2005}{{101}{2005}{{Zhao et~al.}}{{}}} -\bibcite{zhao_inferring_2009}{{102}{2009}{{Zhao et~al.}}{{}}} -\bibcite{zhou_charting_2011}{{103}{2011}{{Zhou et~al.}}{{}}} -\@writefile{toc}{\contentsline {chapter}{Bibliography}{120}{appendix*.63}} +\bibcite{dreos_influence_2016}{{28}{2016}{{Dreos et~al.}}{{}}} +\bibcite{dreos_epd_2013}{{29}{2013}{{Dreos et~al.}}{{}}} +\bibcite{dreos_eukaryotic_2017}{{30}{2017}{{Dreos et~al.}}{{}}} +\bibcite{dreos_mga_2018}{{31}{2018}{{Dreos et~al.}}{{}}} +\bibcite{dreos_eukaryotic_2015}{{32}{2015}{{Dreos et~al.}}{{}}} +\bibcite{fan_characterizing_2016}{{33}{2016}{{Fan et~al.}}{{}}} +\bibcite{fu_motifviz:_2004}{{34}{2004}{{Fu et~al.}}{{}}} +\bibcite{fu_insulator_2008}{{35}{2008}{{Fu et~al.}}{{}}} +\bibcite{gaffney_controls_2012}{{36}{2012}{{Gaffney et~al.}}{{}}} +\bibcite{gerstein_architecture_2012}{{37}{2012}{{Gerstein et~al.}}{{}}} +\bibcite{ghirlando_ctcf:_2016}{{38}{2016}{{Ghirlando and Felsenfeld}}{{}}} +\bibcite{gonzalez-blas_cistopic:_2019}{{39}{2019}{{Gonz\IeC {\'a}lez-Blas et~al.}}{{}}} +\bibcite{grant_fimo:_2011}{{40}{2011}{{Grant et~al.}}{{}}} +\bibcite{grossman_positional_2018}{{41}{2018}{{Grossman et~al.}}{{}}} +\bibcite{groux_spar-k:_2019}{{42}{2019}{{Groux and Bucher}}{{}}} +\bibcite{guo_high_2012}{{43}{2012}{{Guo et~al.}}{{}}} +\bibcite{hagman_early_2005}{{44}{2005}{{Hagman and Lukin}}{{}}} +\bibcite{heinz_simple_2010}{{45}{2010}{{Heinz et~al.}}{{}}} +\bibcite{henikoff_histone_2015}{{46}{2015}{{Henikoff and Smith}}{{}}} +\bibcite{hertz_identification_1990}{{47}{1990}{{Hertz et~al.}}{{}}} +\bibcite{hon_chromasig:_2008}{{48}{2008}{{Hon et~al.}}{{}}} +\bibcite{hyun_writing_2017}{{49}{2017}{{Hyun et~al.}}{{}}} +\bibcite{ioshikhes_variety_2011}{{50}{2011}{{Ioshikhes et~al.}}{{}}} +\bibcite{isakova_smile-seq_2017}{{51}{2017}{{Isakova et~al.}}{{}}} +\bibcite{jiang_nucleosome_2009}{{52}{2009}{{Jiang and Pugh}}{{}}} +\bibcite{jolma_multiplexed_2010}{{53}{2010}{{Jolma et~al.}}{{}}} +\bibcite{jolma_methods_2011-2}{{54}{2011}{{Jolma and Taipale}}{{}}} +\bibcite{jolma_dna-binding_2013}{{55}{2013}{{Jolma et~al.}}{{}}} +\bibcite{kent_blatblast-like_2002}{{56}{2002}{{Kent}}{{}}} +\bibcite{khan_jaspar_2018}{{57}{2018}{{Khan et~al.}}{{}}} +\bibcite{kiselev_sc3:_2017}{{58}{2017}{{Kiselev et~al.}}{{}}} +\bibcite{kouzarides_chromatin_2007}{{59}{2007}{{Kouzarides}}{{}}} +\bibcite{kubik_nucleosome_2015}{{60}{2015}{{Kubik et~al.}}{{}}} +\bibcite{kulakovskiy_hocomoco:_2018}{{61}{2018}{{Kulakovskiy et~al.}}{{}}} +\bibcite{kulakovskiy_hocomoco:_2016}{{62}{2016}{{Kulakovskiy et~al.}}{{}}} +\bibcite{kundaje_ubiquitous_2012}{{63}{2012}{{Kundaje et~al.}}{{}}} +\bibcite{kurotaki_transcriptional_2017}{{64}{2017}{{Kurotaki et~al.}}{{}}} +\bibcite{langmead_fast_2012}{{65}{2012}{{Langmead and Salzberg}}{{}}} +\bibcite{langmead_ultrafast_2009}{{66}{2009}{{Langmead et~al.}}{{}}} +\bibcite{li_sequence_2009}{{67}{2009}{{Li et~al.}}{{}}} +\bibcite{li_identification_2019}{{68}{2019}{{Li et~al.}}{{}}} +\bibcite{lizio_gateways_2015}{{69}{2015}{{Lizio et~al.}}{{}}} +\bibcite{losada_cohesin_2014}{{70}{2014}{{Losada}}{{}}} +\bibcite{langst_chromatin_2015}{{71}{2015}{{L\IeC {\"a}ngst and Manelyte}}{{}}} +\bibcite{maerkl_systems_2007}{{72}{2007}{{Maerkl and Quake}}{{}}} +\bibcite{maier_early_2004}{{73}{2004}{{Maier et~al.}}{{}}} +\bibcite{marsland_machine_2015-1}{{74}{2015}{{Marsland}}{{}}} +\bibcite{mathelier_jaspar_2014}{{75}{2014}{{Mathelier et~al.}}{{}}} +\bibcite{mcginty_robert_k._and_tan_song_fundamentals_2014}{{76}{2014}{{McGinty Robert K. and Tan Song}}{{}}} +\bibcite{nair_probabilistic_2014}{{77}{2014}{{Nair et~al.}}{{}}} +\bibcite{neph_expansive_2012}{{78}{2012}{{Neph et~al.}}{{}}} +\bibcite{nielsen_catchprofiles:_2012}{{79}{2012}{{Nielsen et~al.}}{{}}} +\bibcite{ong_ctcf:_2014}{{80}{2014}{{Ong and Corces}}{{}}} +\bibcite{orenstein_comparative_2014}{{81}{2014}{{Orenstein and Shamir}}{{}}} +\bibcite{ou_motifstack_2018}{{82}{2018}{{Ou et~al.}}{{}}} +\bibcite{pizzi_fast_2008}{{83}{2008}{{Pizzi and Ukkonen}}{{}}} +\bibcite{pollard_detection_2010}{{84}{2010}{{Pollard et~al.}}{{}}} +\bibcite{quinlan_bedtools:_2010}{{85}{2010}{{Quinlan and Hall}}{{}}} +\bibcite{raney_track_2014}{{86}{2014}{{Raney et~al.}}{{}}} +\bibcite{rico_comparative_2017}{{87}{2017}{{Rico et~al.}}{{}}} +\bibcite{roadmap_epigenomics_consortium_integrative_2015}{{88}{2015}{{{Roadmap Epigenomics Consortium} et~al.}}{{}}} +\bibcite{rustici_arrayexpress_2013}{{89}{2013}{{Rustici et~al.}}{{}}} +\bibcite{schones_dynamic_2008}{{90}{2008}{{Schones et~al.}}{{}}} +\bibcite{schones_statistical_2007}{{91}{2007}{{Schones et~al.}}{{}}} +\bibcite{schutz_mamot:_2008}{{92}{2008}{{Sch\IeC {\"u}tz and Delorenzi}}{{}}} +\bibcite{siepel_evolutionarily_2005}{{93}{2005}{{Siepel et~al.}}{{}}} +\bibcite{soufi_pioneer_2015}{{94}{2015}{{Soufi et~al.}}{{}}} +\bibcite{stedman_cohesins_2008}{{95}{2008}{{Stedman et~al.}}{{}}} +\bibcite{trifonov_cracking_2011}{{96}{2011}{{Trifonov}}{{}}} +\bibcite{turatsinze_using_2008}{{97}{2008}{{Turatsinze et~al.}}{{}}} +\bibcite{vierstra_genomic_2016}{{98}{2016}{{Vierstra and Stamatoyannopoulos}}{{}}} +\bibcite{voss_dynamic_2014}{{99}{2014}{{Voss and Hager}}{{}}} +\bibcite{wang_sequence_2012}{{100}{2012}{{Wang et~al.}}{{}}} +\bibcite{weirauch_evaluation_2013}{{101}{2013}{{Weirauch et~al.}}{{}}} +\bibcite{west_nucleosomal_2014}{{102}{2014}{{West et~al.}}{{}}} +\bibcite{wu_biogps:_2016}{{103}{2016}{{Wu et~al.}}{{}}} +\bibcite{zaret_pioneer_2011}{{104}{2011}{{Zaret and Carroll}}{{}}} +\bibcite{zhang_canonical_2014}{{105}{2014}{{Zhang et~al.}}{{}}} +\bibcite{zhao_tred:_2005}{{106}{2005}{{Zhao et~al.}}{{}}} +\bibcite{zhao_inferring_2009}{{107}{2009}{{Zhao et~al.}}{{}}} +\bibcite{zhou_charting_2011}{{108}{2011}{{Zhou et~al.}}{{}}} +\@writefile{toc}{\contentsline {chapter}{Bibliography}{122}{appendix*.64}} \@setckpt{tail/biblio}{ -\setcounter{page}{121} +\setcounter{page}{123} \setcounter{equation}{0} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{1} \setcounter{section}{1} \setcounter{subsection}{0} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{19} \setcounter{table}{0} -\setcounter{NAT@ctr}{103} +\setcounter{NAT@ctr}{108} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{39} \setcounter{algocfline}{3} \setcounter{algocfproc}{3} \setcounter{algocf}{3} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} } diff --git a/tail/bibliography.bib b/tail/bibliography.bib index d53d213..bbb4569 100644 --- a/tail/bibliography.bib +++ b/tail/bibliography.bib @@ -1,5493 +1,5548 @@ @article{nozaki_tight_2011, title = {Tight associations between transcription promoter type and epigenetic variation in histone positioning and modification}, volume = {12}, copyright = {2011 Nozaki et al; licensee BioMed Central Ltd.}, issn = {1471-2164}, url = {http://www.biomedcentral.com/1471-2164/12/416/abstract}, doi = {10.1186/1471-2164-12-416}, abstract = {PMID: 21846408}, language = {en}, number = {1}, urldate = {2015-04-17}, journal = {BMC Genomics}, author = {Nozaki, Tadasu and Yachie, Nozomu and Ogawa, Ryu and Kratz, Anton and Saito, Rintaro and Tomita, Masaru}, month = aug, year = {2011}, pmid = {21846408}, pages = {416}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5NVIJMZH/Nozaki et al. - 2011 - Tight associations between transcription promoter .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4R8FAQ3F/416.html:text/html} } @article{boyle_high-resolution_2008, title = {High-{Resolution} {Mapping} and {Characterization} of {Open} {Chromatin} across the {Genome}}, volume = {132}, issn = {0092-8674}, url = {http://www.sciencedirect.com/science/article/pii/S0092867407016133}, doi = {10.1016/j.cell.2007.12.014}, abstract = {Summary Mapping DNase I hypersensitive (HS) sites is an accurate method of identifying the location of genetic regulatory elements, including promoters, enhancers, silencers, insulators, and locus control regions. We employed high-throughput sequencing and whole-genome tiled array strategies to identify DNase I HS sites within human primary CD4+ T cells. Combining these two technologies, we have created a comprehensive and accurate genome-wide open chromatin map. Surprisingly, only 16\%–21\% of the identified 94,925 DNase I HS sites are found in promoters or first exons of known genes, but nearly half of the most open sites are in these regions. In conjunction with expression, motif, and chromatin immunoprecipitation data, we find evidence of cell-type-specific characteristics, including the ability to identify transcription start sites and locations of different chromatin marks utilized in these cells. In addition, and unexpectedly, our analyses have uncovered detailed features of nucleosome structure.}, number = {2}, urldate = {2015-09-11}, journal = {Cell}, author = {Boyle, Alan P. and Davis, Sean and Shulha, Hennady P. and Meltzer, Paul and Margulies, Elliott H. and Weng, Zhiping and Furey, Terrence S. and Crawford, Gregory E.}, month = jan, year = {2008}, keywords = {DNA}, pages = {311--322}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/AT3JAG9P/Boyle et al. - 2008 - High-Resolution Mapping and Characterization of Op.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/W2KDFIGZ/S0092867407016133.html:text/html} } @article{barski_high-resolution_2007, title = {High-{Resolution} {Profiling} of {Histone} {Methylations} in the {Human} {Genome}}, volume = {129}, issn = {0092-8674}, url = {http://www.sciencedirect.com/science/article/pii/S0092867407006009}, doi = {10.1016/j.cell.2007.05.009}, abstract = {Histone modifications are implicated in influencing gene expression. We have generated high-resolution maps for the genome-wide distribution of 20 histone lysine and arginine methylations as well as histone variant H2A.Z, RNA polymerase II, and the insulator binding protein CTCF across the human genome using the Solexa 1G sequencing technology. Typical patterns of histone methylations exhibited at promoters, insulators, enhancers, and transcribed regions are identified. The monomethylations of H3K27, H3K9, H4K20, H3K79, and H2BK5 are all linked to gene activation, whereas trimethylations of H3K27, H3K9, and H3K79 are linked to repression. H2A.Z associates with functional regulatory elements, and CTCF marks boundaries of histone methylation domains. Chromosome banding patterns are correlated with unique patterns of histone modifications. Chromosome breakpoints detected in T cell cancers frequently reside in chromatin regions associated with H3K4 methylations. Our data provide new insights into the function of histone methylation and chromatin organization in genome function.}, number = {4}, urldate = {2014-03-05}, journal = {Cell}, author = {Barski, Artem and Cuddapah, Suresh and Cui, Kairong and Roh, Tae-Young and Schones, Dustin E. and Wang, Zhibin and Wei, Gang and Chepelev, Iouri and Zhao, Keji}, month = may, year = {2007}, keywords = {DNA, PROTEINS, SYSBIO}, pages = {823--837}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QUNNP84A/Barski et al. - 2007 - High-Resolution Profiling of Histone Methylations .pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/AD6CEICR/S0092867407006009.html:text/html} } @article{bailey_practical_2013, title = {Practical {Guidelines} for the {Comprehensive} {Analysis} of {ChIP}-seq {Data}}, volume = {9}, url = {http://dx.doi.org/10.1371/journal.pcbi.1003326}, doi = {10.1371/journal.pcbi.1003326}, abstract = {Mapping the chromosomal locations of transcription factors, nucleosomes, histone modifications, chromatin remodeling enzymes, chaperones, and polymerases is one of the key tasks of modern biology, as evidenced by the Encyclopedia of DNA Elements (ENCODE) Project. To this end, chromatin immunoprecipitation followed by high-throughput sequencing (ChIP-seq) is the standard methodology. Mapping such protein-DNA interactions in vivo using ChIP-seq presents multiple challenges not only in sample preparation and sequencing but also for computational analysis. Here, we present step-by-step guidelines for the computational analysis of ChIP-seq data. We address all the major steps in the analysis of ChIP-seq data: sequencing depth selection, quality checking, mapping, data normalization, assessment of reproducibility, peak calling, differential binding analysis, controlling the false discovery rate, peak annotation, visualization, and motif analysis. At each step in our guidelines we discuss some of the software tools most frequently used. We also highlight the challenges and problems associated with each step in ChIP-seq data analysis. We present a concise workflow for the analysis of ChIP-seq data in Figure 1 that complements and expands on the recommendations of the ENCODE and modENCODE projects. Each step in the workflow is described in detail in the following sections.}, number = {11}, urldate = {2014-05-22}, journal = {PLoS Comput Biol}, author = {Bailey, Timothy and Krajewski, Pawel and Ladunga, Istvan and Lefebvre, Celine and Li, Qunhua and Liu, Tao and Madrigal, Pedro and Taslim, Cenny and Zhang, Jie}, month = nov, year = {2013}, pages = {e1003326}, file = {PLoS Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4FGJAV84/Bailey et al. - 2013 - Practical Guidelines for the Comprehensive Analysi.pdf:application/pdf;PLoS Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4SF6WQ4C/infodoi10.1371journal.pcbi.html:text/html} } @article{do_what_2008, title = {What is the expectation maximization algorithm?}, volume = {26}, copyright = {© 2008 Nature Publishing Group}, issn = {1087-0156}, url = {http://www.nature.com/nbt/journal/v26/n8/full/nbt1406.html#close}, doi = {10.1038/nbt1406}, abstract = {The expectation maximization algorithm arises in many computational biology applications that involve probabilistic models. What is it good for, and how does it work?}, language = {en}, number = {8}, urldate = {2015-04-21}, journal = {Nature Biotechnology}, author = {Do, Chuong B. and Batzoglou, Serafim}, month = aug, year = {2008}, pages = {897--899}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5FGUS8D9/Do et Batzoglou - 2008 - What is the expectation maximization algorithm.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FBND2UBT/nbt1406.html:text/html} } @article{tran_survey_2014, title = {A survey of motif finding {Web} tools for detecting binding site motifs in {ChIP}-{Seq} data}, volume = {9}, issn = {1745-6150}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4022013/}, doi = {10.1186/1745-6150-9-4}, abstract = {Abstract ChIP-Seq (chromatin immunoprecipitation sequencing) has provided the advantage for finding motifs as ChIP-Seq experiments narrow down the motif finding to binding site locations. Recent motif finding tools facilitate the motif detection by providing user-friendly Web interface. In this work, we reviewed nine motif finding Web tools that are capable for detecting binding site motifs in ChIP-Seq data. We showed each motif finding Web tool has its own advantages for detecting motifs that other tools may not discover. We recommended the users to use multiple motif finding Web tools that implement different algorithms for obtaining significant motifs, overlapping resemble motifs, and non-overlapping motifs. Finally, we provided our suggestions for future development of motif finding Web tool that better assists researchers for finding motifs in ChIP-Seq data. Reviewers This article was reviewed by Prof. Sandor Pongor, Dr. Yuriy Gusev, and Dr. Shyam Prabhakar (nominated by Prof. Limsoon Wong).}, urldate = {2015-04-09}, journal = {Biology Direct}, author = {Tran, Ngoc Tam L and Huang, Chun-Hsi}, month = feb, year = {2014}, pmid = {24555784}, pmcid = {PMC4022013}, pages = {4}, file = {PubMed Central Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IWAQUBVS/Tran et Huang - 2014 - A survey of motif finding Web tools for detecting .pdf:application/pdf} } @article{deng_yin_2010, title = {Yin {Yang} 1}, volume = {1}, issn = {2154-1264}, url = {http://dx.doi.org/10.4161/trns.1.2.12375}, doi = {10.4161/trns.1.2.12375}, abstract = {As a transcription factor, Yin Yang 1 (YY1) regulates the transcription of a dazzling list of genes and the number of its targets still mounts. Recent studies revealed that YY1 possesses functions independent of its DNA binding activity and its regulatory role in tumorigenesis has started to emerge.}, number = {2}, urldate = {2015-04-09}, journal = {Transcription}, author = {Deng, Zhiyong and Cao, Paul and Wan, Mei Mei and Sui, Guangchao}, month = sep, year = {2010}, pages = {81--84}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/NX4NGAC2/Deng et al. - 2010 - Yin Yang 1.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UXX3CF2A/trns.1.2.html:text/html} } @article{wang_sequence_2012, title = {Sequence features and chromatin structure around the genomic regions bound by 119 human transcription factors}, volume = {22}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/22/9/1798}, doi = {10.1101/gr.139105.112}, abstract = {Chromatin immunoprecipitation coupled with high-throughput sequencing (ChIP-seq) has become the dominant technique for mapping transcription factor (TF) binding regions genome-wide. We performed an integrative analysis centered around 457 ChIP-seq data sets on 119 human TFs generated by the ENCODE Consortium. We identified highly enriched sequence motifs in most data sets, revealing new motifs and validating known ones. The motif sites (TF binding sites) are highly conserved evolutionarily and show distinct footprints upon DNase I digestion. We frequently detected secondary motifs in addition to the canonical motifs of the TFs, indicating tethered binding and cobinding between multiple TFs. We observed significant position and orientation preferences between many cobinding TFs. Genes specifically expressed in a cell line are often associated with a greater occurrence of nearby TF binding in that cell line. We observed cell-line–specific secondary motifs that mediate the binding of the histone deacetylase HDAC2 and the enhancer-binding protein EP300. TF binding sites are located in GC-rich, nucleosome-depleted, and DNase I sensitive regions, flanked by well-positioned nucleosomes, and many of these features show cell type specificity. The GC-richness may be beneficial for regulating TF binding because, when unoccupied by a TF, these regions are occupied by nucleosomes in vivo. We present the results of our analysis in a TF-centric web repository Factorbook (http://factorbook.org) and will continually update this repository as more ENCODE data are generated.}, language = {en}, number = {9}, urldate = {2015-04-21}, journal = {Genome Research}, author = {Wang, Jie and Zhuang, Jiali and Iyer, Sowmya and Lin, XinYing and Whitfield, Troy W. and Greven, Melissa C. and Pierce, Brian G. and Dong, Xianjun and Kundaje, Anshul and Cheng, Yong and Rando, Oliver J. and Birney, Ewan and Myers, Richard M. and Noble, William S. and Snyder, Michael and Weng, Zhiping}, month = sep, year = {2012}, pmid = {22955990}, pages = {1798--1812}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ZG5GCSXZ/Wang et al. - 2012 - Sequence features and chromatin structure around t.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FDW4W9EP/1798.html:text/html;Wang et al. 2012 - Supplemental_materials.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BKCK28GC/Wang et al. 2012 - Supplemental_materials.pdf:application/pdf} } @article{stormo_determining_2010, title = {Determining the specificity of protein–{DNA} interactions}, volume = {11}, copyright = {© 2010 Nature Publishing Group}, issn = {1471-0056}, url = {http://www.nature.com/nrg/journal/v11/n11/full/nrg2845.html}, doi = {10.1038/nrg2845}, abstract = {Proteins, such as many transcription factors, that bind to specific DNA sequences are essential for the proper regulation of gene expression. Identifying the specific sequences that each factor binds can help to elucidate regulatory networks within cells and how genetic variation can cause disruption of normal gene expression, which is often associated with disease. Traditional methods for determining the specificity of DNA-binding proteins are slow and laborious, but several new high-throughput methods can provide comprehensive binding information much more rapidly. Combined with in vivo determinations of transcription factor binding locations, this information provides more detailed views of the regulatory circuitry of cells and the effects of variation on gene expression.}, language = {en}, number = {11}, urldate = {2015-04-09}, journal = {Nature Reviews Genetics}, author = {Stormo, Gary D. and Zhao, Yue}, month = nov, year = {2010}, pages = {751--760}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/WTQGGPQU/Stormo et Zhao - 2010 - Determining the specificity of protein–DNA interac.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KURBDEHF/nrg2845.html:text/html} } @article{pepke_computation_2009, title = {Computation for {ChIP}-seq and {RNA}-seq studies}, volume = {6}, copyright = {© 2009 Nature Publishing Group}, issn = {1548-7091}, url = {http://www.nature.com/nmeth/journal/v6/n11s/full/nmeth.1371.html}, doi = {10.1038/nmeth.1371}, abstract = {Genome-wide measurements of protein-DNA interactions and transcriptomes are increasingly done by deep DNA sequencing methods (ChIP-seq and RNA-seq). The power and richness of these counting-based measurements comes at the cost of routinely handling tens to hundreds of millions of reads. Whereas early adopters necessarily developed their own custom computer code to analyze the first ChIP-seq and RNA-seq datasets, a new generation of more sophisticated algorithms and software tools are emerging to assist in the analysis phase of these projects. Here we describe the multilayered analyses of ChIP-seq and RNA-seq datasets, discuss the software packages currently available to perform tasks at each layer and describe some upcoming challenges and features for future analysis tools. We also discuss how software choices and uses are affected by specific aspects of the underlying biology and data structure, including genome size, positional clustering of transcription factor binding sites, transcript discovery and expression quantification.}, language = {en}, number = {11s}, urldate = {2015-04-09}, journal = {Nature Methods}, author = {Pepke, Shirley and Wold, Barbara and Mortazavi, Ali}, month = nov, year = {2009}, pages = {S22--S32}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/NUI26MUX/Pepke et al. - 2009 - Computation for ChIP-seq and RNA-seq studies.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TDGAAFHM/nmeth.1371.html:text/html} } @article{wilbanks_evaluation_2010, title = {Evaluation of {Algorithm} {Performance} in {ChIP}-{Seq} {Peak} {Detection}}, volume = {5}, url = {http://dx.doi.org/10.1371/journal.pone.0011471}, doi = {10.1371/journal.pone.0011471}, abstract = {Next-generation DNA sequencing coupled with chromatin immunoprecipitation (ChIP-seq) is revolutionizing our ability to interrogate whole genome protein-DNA interactions. Identification of protein binding sites from ChIP-seq data has required novel computational tools, distinct from those used for the analysis of ChIP-Chip experiments. The growing popularity of ChIP-seq spurred the development of many different analytical programs (at last count, we noted 31 open source methods), each with some purported advantage. Given that the literature is dense and empirical benchmarking challenging, selecting an appropriate method for ChIP-seq analysis has become a daunting task. Herein we compare the performance of eleven different peak calling programs on common empirical, transcription factor datasets and measure their sensitivity, accuracy and usability. Our analysis provides an unbiased critical assessment of available technologies, and should assist researchers in choosing a suitable tool for handling ChIP-seq data.}, number = {7}, urldate = {2015-04-22}, journal = {PLoS ONE}, author = {Wilbanks, Elizabeth G. and Facciotti, Marc T.}, month = jul, year = {2010}, pages = {e11471}, file = {PLoS Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RWE7B96I/Wilbanks et Facciotti - 2010 - Evaluation of Algorithm Performance in ChIP-Seq Pe.pdf:application/pdf} } @article{landt_chip-seq_2012, title = {{ChIP}-seq guidelines and practices of the {ENCODE} and {modENCODE} consortia}, volume = {22}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/22/9/1813}, doi = {10.1101/gr.136184.111}, abstract = {Chromatin immunoprecipitation (ChIP) followed by high-throughput DNA sequencing (ChIP-seq) has become a valuable and widely used approach for mapping the genomic location of transcription-factor binding and histone modifications in living cells. Despite its widespread use, there are considerable differences in how these experiments are conducted, how the results are scored and evaluated for quality, and how the data and metadata are archived for public use. These practices affect the quality and utility of any global ChIP experiment. Through our experience in performing ChIP-seq experiments, the ENCODE and modENCODE consortia have developed a set of working standards and guidelines for ChIP experiments that are updated routinely. The current guidelines address antibody validation, experimental replication, sequencing depth, data and metadata reporting, and data quality assessment. We discuss how ChIP quality, assessed in these ways, affects different uses of ChIP-seq data. All data sets used in the analysis have been deposited for public viewing and downloading at the ENCODE (http://encodeproject.org/ENCODE/) and modENCODE (http://www.modencode.org/) portals.}, language = {en}, number = {9}, urldate = {2015-04-16}, journal = {Genome Research}, author = {Landt, Stephen G. and Marinov, Georgi K. and Kundaje, Anshul and Kheradpour, Pouya and Pauli, Florencia and Batzoglou, Serafim and Bernstein, Bradley E. and Bickel, Peter and Brown, James B. and Cayting, Philip and Chen, Yiwen and DeSalvo, Gilberto and Epstein, Charles and Fisher-Aylor, Katherine I. and Euskirchen, Ghia and Gerstein, Mark and Gertz, Jason and Hartemink, Alexander J. and Hoffman, Michael M. and Iyer, Vishwanath R. and Jung, Youngsook L. and Karmakar, Subhradip and Kellis, Manolis and Kharchenko, Peter V. and Li, Qunhua and Liu, Tao and Liu, X. Shirley and Ma, Lijia and Milosavljevic, Aleksandar and Myers, Richard M. and Park, Peter J. and Pazin, Michael J. and Perry, Marc D. and Raha, Debasish and Reddy, Timothy E. and Rozowsky, Joel and Shoresh, Noam and Sidow, Arend and Slattery, Matthew and Stamatoyannopoulos, John A. and Tolstorukov, Michael Y. and White, Kevin P. and Xi, Simon and Farnham, Peggy J. and Lieb, Jason D. and Wold, Barbara J. and Snyder, Michael}, month = sep, year = {2012}, pmid = {22955991}, pages = {1813--1831}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4JW5NMSR/Landt et al. - 2012 - ChIP-seq guidelines and practices of the ENCODE an.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9IPS4QVU/1813.html:text/html} } @article{zimmermann_genomic_2010, series = {{RNA}: {From} {Sequence} to {Structure} and {Dynamics}}, title = {Genomic {SELEX}: {A} discovery tool for genomic aptamers}, volume = {52}, issn = {1046-2023}, shorttitle = {Genomic {SELEX}}, url = {http://www.sciencedirect.com/science/article/pii/S1046202310001581}, doi = {10.1016/j.ymeth.2010.06.004}, abstract = {Genomic SELEX is a discovery tool for genomic aptamers, which are genomically encoded functional domains in nucleic acid molecules that recognize and bind specific ligands. When combined with genomic libraries and using RNA-binding proteins as baits, Genomic SELEX used with high-throughput sequencing enables the discovery of genomic RNA aptamers and the identification of RNA–protein interaction networks. Here we describe how to construct and analyze genomic libraries, how to choose baits for selections, how to perform the selection procedure and finally how to analyze the enriched sequences derived from deep sequencing. As a control procedure, we recommend performing a “Neutral” SELEX experiment in parallel to the selection, omitting the selection step. This control experiment provides a background signal for comparison with the positively selected pool. We also recommend deep sequencing the initial library in order to facilitate the final in silico analysis of enrichment with respect to the initial levels. Counter selection procedures, using modified or inactive baits, allow strengthening the binding specificity of the winning selected sequences.}, number = {2}, urldate = {2015-06-25}, journal = {Methods}, author = {Zimmermann, Bob and Bilusic, Ivana and Lorenz, Christina and Schroeder, Renée}, month = oct, year = {2010}, pages = {125--132}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Z8JU4VG9/Zimmermann et al. - 2010 - Genomic SELEX A discovery tool for genomic aptame.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/WD9VZP7G/S1046202310001581.html:text/html} } @article{schwalie_co-binding_2013, title = {Co-binding by {YY}1 identifies the transcriptionally active, highly conserved set of {CTCF}-bound regions in primate genomes}, volume = {14}, copyright = {2013 Schwalie et al.; licensee BioMed Central Ltd.}, issn = {1465-6906}, url = {http://genomebiology.com/2013/14/12/R148/abstract}, doi = {10.1186/gb-2013-14-12-r148}, abstract = {PMID: 24380390}, language = {en}, number = {12}, urldate = {2015-06-26}, journal = {Genome Biology}, author = {Schwalie, Petra C. and Ward, Michelle C. and Cain, Carolyn E. and Faure, Andre J. and Gilad, Yoav and Odom, Duncan T. and Flicek, Paul}, month = dec, year = {2013}, pmid = {24380390}, pages = {R148}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/F284DM3W/Schwalie et al. - 2013 - Co-binding by YY1 identifies the transcriptionally.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/G3RIKXKT/R148.html:text/html} } @article{giraud_parallel_2011, series = {Follow-on of {ISPDC}'2009 and {HeteroPar}'2009}, title = {Parallel {Position} {Weight} {Matrices} algorithms}, volume = {37}, issn = {0167-8191}, url = {http://www.sciencedirect.com/science/article/pii/S0167819110001389}, doi = {10.1016/j.parco.2010.10.001}, abstract = {Position Weight Matrices (PWMs) are broadly used in computational biology. The basic problems, Scan and MultipleScan, aim to find all the occurrences of a given PWM or a set of PWMs in long sequences. Some other PWM tasks share a common NP-hard subproblem, ScoreDistribution. The existing algorithms rely on the enumeration on a large set of scores or words, and they are mostly not suitable for parallelization. We propose a new algorithm, BucketScoreDistribution, that is both very efficient and suitable for parallelization. We bound the error induced by this algorithm. We realized a GPU prototype for Scan, MultipleScan and BucketScoreDistribution with the CUDA libraries, and report for the different problems speedups larger than 10× on several Nvidia cards.}, number = {8}, urldate = {2015-07-09}, journal = {Parallel Computing}, author = {Giraud, Mathieu and Varré, Jean-Stéphane}, month = aug, year = {2011}, keywords = {Position Weight Matrices, P-value estimation, Pattern matching, Score distribution, Many-core architectures, GPU, bioinformatics}, pages = {466--478}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7RG5J9PV/Giraud et Varré - 2011 - Parallel Position Weight Matrices algorithms.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HHPD6IGI/S0167819110001389.html:text/html} } @article{wu_fast_2000, title = {Fast probabilistic analysis of sequence function using scoring matrices}, volume = {16}, issn = {1367-4803, 1460-2059}, url = {http://bioinformatics.oxfordjournals.org/content/16/3/233}, doi = {10.1093/bioinformatics/16.3.233}, abstract = {Motivation: We present techniques for increasing the speed of sequence analysis using scoring matrices. Our techniques are based on calculating, for a given scoring matrix, the quantile function, which assigns a probability, or p, value to each segmental score. Our techniques also permit the user to specify a p threshold to indicate the desired trade-off between sensitivity and speed for a particular sequence analysis. The resulting increase in speed should allow scoring matrices to be used more widely in large-scale sequencing and annotation projects. Results: We develop three techniques for increasing the speed of sequence analysis: probability filtering, lookahead scoring, and permuted lookahead scoring. In probability filtering, we compute the score threshold that corresponds to the user-specified p threshold. We use the score threshold to limit the number of segments that are retained in the search process. In lookahead scoring, we test intermediate scores to determine whether they will possibly exceed the score threshold. In permuted lookahead scoring, we score each segment in a particular order designed to maximize the likelihood of early termination. Our two lookahead scoring techniques reduce substantially the number of residues that must be examined. The fraction of residues examined ranges from 62 to 6\%, depending on the p threshold chosen by the user. These techniques permit sequence analysis with scoring matrices at speeds that are several times faster than existing programs. On a database of 12 177 alignment blocks, our techniques permit sequence analysis at a speed of 225 residues/s for a p threshold of 10−6, and 541 residues/s for a p threshold of 10−20. In order to compute the quantile function, we may use either an independence assumption or a Markov assumption. We measure the effect of first- and second-order Markov assumptions and find that they tend to raise the p value of segments, when compared with the independence assumption, by average ratios of 1.30 and 1.69, respectively. We also compare our technique with the empirical 99.5th percentile scores compiled in the BLOCKSPLUS database, and find that they correspond on average to a p value of 1.5 × 10−5. Availability: The techniques described above are implemented in a software package called EMATRIX. This package is available from the authors for free academic use or for licensed commercial use. The EMATRIX set of programs is also available on the Internet at http://motif.stanford.edu/ematrix.}, language = {en}, number = {3}, urldate = {2015-07-24}, journal = {Bioinformatics}, author = {Wu, Thomas D. and Nevill-Manning, Craig G. and Brutlag, Douglas L.}, month = mar, year = {2000}, pmid = {10869016}, pages = {233--244}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TBQ8WNZW/Wu et al. - 2000 - Fast probabilistic analysis of sequence function u.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GHRVXN4S/233.html:text/html} } @article{ioshikhes_nucleosome_2006, title = {Nucleosome positions predicted through comparative genomics}, volume = {38}, copyright = {© 2006 Nature Publishing Group}, issn = {1061-4036}, url = {http://www.nature.com/ng/journal/v38/n10/full/ng1878.html}, doi = {10.1038/ng1878}, abstract = {DNA sequence has long been recognized as an important contributor to nucleosome positioning, which has the potential to regulate access to genes. The extent to which the nucleosomal architecture at promoters is delineated by the underlying sequence is now being worked out. Here we use comparative genomics to report a genome-wide map of nucleosome positioning sequences (NPSs) located in the vicinity of all Saccharomyces cerevisiae genes. We find that the underlying DNA sequence provides a very good predictor of nucleosome locations that have been experimentally mapped to a small fraction of the genome. Notably, distinct classes of genes possess characteristic arrangements of NPSs that may be important for their regulation. In particular, genes that have a relatively compact NPS arrangement over the promoter region tend to have a TATA box buried in an NPS and tend to be highly regulated by chromatin modifying and remodeling factors.}, language = {en}, number = {10}, urldate = {2015-07-24}, journal = {Nature Genetics}, author = {Ioshikhes, Ilya P. and Albert, Istvan and Zanton, Sara J. and Pugh, B. Franklin}, month = oct, year = {2006}, pages = {1210--1215}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MRIMS7MH/Ioshikhes et al. - 2006 - Nucleosome positions predicted through comparative.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KCSRRJC8/ng1878.html:text/html} } @article{thurman_accessible_2012, title = {The accessible chromatin landscape of the human genome}, volume = {489}, copyright = {© 2012 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v489/n7414/full/nature11232.html}, doi = {10.1038/nature11232}, abstract = {DNase I hypersensitive sites (DHSs) are markers of regulatory DNA and have underpinned the discovery of all classes of cis-regulatory elements including enhancers, promoters, insulators, silencers and locus control regions. Here we present the first extensive map of human DHSs identified through genome-wide profiling in 125 diverse cell and tissue types. We identify {\textasciitilde}2.9 million DHSs that encompass virtually all known experimentally validated cis-regulatory sequences and expose a vast trove of novel elements, most with highly cell-selective regulation. Annotating these elements using ENCODE data reveals novel relationships between chromatin accessibility, transcription, DNA methylation and regulatory factor occupancy patterns. We connect {\textasciitilde}580,000 distal DHSs with their target promoters, revealing systematic pairing of different classes of distal DHSs and specific promoter types. Patterning of chromatin accessibility at many regulatory regions is organized with dozens to hundreds of co-activated elements, and the transcellular DNase I sensitivity pattern at a given region can predict cell-type-specific functional behaviours. The DHS landscape shows signatures of recent functional evolutionary constraint. However, the DHS compartment in pluripotent and immortalized cells exhibits higher mutation rates than that in highly differentiated cells, exposing an unexpected link between chromatin accessibility, proliferative potential and patterns of human variation.}, language = {en}, number = {7414}, urldate = {2015-09-11}, journal = {Nature}, author = {Thurman, Robert E. and Rynes, Eric and Humbert, Richard and Vierstra, Jeff and Maurano, Matthew T. and Haugen, Eric and Sheffield, Nathan C. and Stergachis, Andrew B. and Wang, Hao and Vernot, Benjamin and Garg, Kavita and John, Sam and Sandstrom, Richard and Bates, Daniel and Boatman, Lisa and Canfield, Theresa K. and Diegel, Morgan and Dunn, Douglas and Ebersol, Abigail K. and Frum, Tristan and Giste, Erika and Johnson, Audra K. and Johnson, Ericka M. and Kutyavin, Tanya and Lajoie, Bryan and Lee, Bum-Kyu and Lee, Kristen and London, Darin and Lotakis, Dimitra and Neph, Shane and Neri, Fidencio and Nguyen, Eric D. and Qu, Hongzhu and Reynolds, Alex P. and Roach, Vaughn and Safi, Alexias and Sanchez, Minerva E. and Sanyal, Amartya and Shafer, Anthony and Simon, Jeremy M. and Song, Lingyun and Vong, Shinny and Weaver, Molly and Yan, Yongqi and Zhang, Zhancheng and Zhang, Zhuzhu and Lenhard, Boris and Tewari, Muneesh and Dorschner, Michael O. and Hansen, R. Scott and Navas, Patrick A. and Stamatoyannopoulos, George and Iyer, Vishwanath R. and Lieb, Jason D. and Sunyaev, Shamil R. and Akey, Joshua M. and Sabo, Peter J. and Kaul, Rajinder and Furey, Terrence S. and Dekker, Job and Crawford, Gregory E. and Stamatoyannopoulos, John A.}, month = sep, year = {2012}, keywords = {evolution, genetics, genomics, molecular biology}, pages = {75--82}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FXGKAKFH/Thurman et al. - 2012 - The accessible chromatin landscape of the human ge.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/NIVF6S5G/nature11232.html:text/html} } @article{nair_probabilistic_2014, title = {Probabilistic partitioning methods to find significant patterns in {ChIP}-{Seq} data}, volume = {30}, issn = {1367-4803, 1460-2059}, url = {http://bioinformatics.oxfordjournals.org/content/30/17/2406}, doi = {10.1093/bioinformatics/btu318}, abstract = {Motivation: We have witnessed an enormous increase in ChIP-Seq data for histone modifications in the past few years. Discovering significant patterns in these data is an important problem for understanding biological mechanisms. Results: We propose probabilistic partitioning methods to discover significant patterns in ChIP-Seq data. Our methods take into account signal magnitude, shape, strand orientation and shifts. We compare our methods with some current methods and demonstrate significant improvements, especially with sparse data. Besides pattern discovery and classification, probabilistic partitioning can serve other purposes in ChIP-Seq data analysis. Specifically, we exemplify its merits in the context of peak finding and partitioning of nucleosome positioning patterns in human promoters. Availability and implementation: The software and code are available in the supplementary material. Contact: Philipp.Bucher@isb-sib.ch Supplementary information: Supplementary data are available at Bioinformatics online.}, language = {en}, number = {17}, urldate = {2015-07-30}, journal = {Bioinformatics}, author = {Nair, Nishanth Ulhas and Kumar, Sunil and Moret, Bernard M. E. and Bucher, Philipp}, month = sep, year = {2014}, pmid = {24812341}, pages = {2406--2413}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FEN4FCC6/Nair et al. - 2014 - Probabilistic partitioning methods to find signifi.pdf:application/pdf;nair2014_suppl.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FEN4FCC6/nair2014_suppl.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8KVRCEVG/2406.html:text/html} } @article{clifford_comparison_2011, title = {Comparison of {Clustering} {Methods} for {Investigation} of {Genome}-{Wide} {Methylation} {Array} {Data}}, volume = {2}, issn = {1664-8021}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3268382/}, doi = {10.3389/fgene.2011.00088}, abstract = {The use of genome-wide methylation arrays has proved very informative to investigate both clinical and biological questions in human epigenomics. The use of clustering methods either for exploration of these data or to compare to an a priori grouping, e.g., normal versus disease allows assessment of groupings of data without user bias. However no consensus on the methods to use for clustering of methylation array approaches has been reached. To determine the most appropriate clustering method for analysis of illumina array methylation data, a collection of data sets was simulated and used to compare clustering methods. Both hierarchical clustering and non-hierarchical clustering methods (k-means, k-medoids, and fuzzy clustering algorithms) were compared using a range of distance and linkage methods. As no single method consistently outperformed others across different simulations, we propose a method to capture the best clustering outcome based on an additional measure, the silhouette width. This approach produced a consistently higher cluster accuracy compared to using any one method in isolation.}, urldate = {2015-07-31}, journal = {Frontiers in Genetics}, author = {Clifford, Harry and Wessely, Frank and Pendurthi, Satish and Emes, Richard D.}, month = dec, year = {2011}, pmid = {22303382}, pmcid = {PMC3268382}, file = {PubMed Central Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/AECCZPQ2/Clifford et al. - 2011 - Comparison of Clustering Methods for Investigation.pdf:application/pdf} } @article{melton_recurrent_2015, title = {Recurrent somatic mutations in regulatory regions of human cancer genomes}, volume = {47}, copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1061-4036}, url = {http://www.nature.com/ng/journal/v47/n7/full/ng.3332.html}, doi = {10.1038/ng.3332}, abstract = {Aberrant regulation of gene expression in cancer can promote survival and proliferation of cancer cells. Here we integrate whole-genome sequencing data from The Cancer Genome Atlas (TCGA) for 436 patients from 8 cancer subtypes with ENCODE and other regulatory annotations to identify point mutations in regulatory regions. We find evidence for positive selection of mutations in transcription factor binding sites, consistent with these sites regulating important cancer cell functions. Using a new method that adjusts for sample- and genomic locus–specific mutation rates, we identify recurrently mutated sites across individuals with cancer. Mutated regulatory sites include known sites in the TERT promoter and many new sites, including a subset in proximity to cancer-related genes. In reporter assays, two new sites display decreased enhancer activity upon mutation. These data demonstrate that many regulatory regions contain mutations under selective pressure and suggest a greater role for regulatory mutations in cancer than previously appreciated.}, language = {en}, number = {7}, urldate = {2015-08-25}, journal = {Nature Genetics}, author = {Melton, Collin and Reuter, Jason A. and Spacek, Damek V. and Snyder, Michael}, month = jul, year = {2015}, pages = {710--716}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6Z4KJ5SK/Melton et al. - 2015 - Recurrent somatic mutations in regulatory regions .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3SIKTKPH/ng.3332.html:text/html} } @article{kundaje_ubiquitous_2012, title = {Ubiquitous heterogeneity and asymmetry of the chromatin environment at regulatory elements}, volume = {22}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/22/9/1735}, doi = {10.1101/gr.136366.111}, abstract = {Gene regulation at functional elements (e.g., enhancers, promoters, insulators) is governed by an interplay of nucleosome remodeling, histone modifications, and transcription factor binding. To enhance our understanding of gene regulation, the ENCODE Consortium has generated a wealth of ChIP-seq data on DNA-binding proteins and histone modifications. We additionally generated nucleosome positioning data on two cell lines, K562 and GM12878, by MNase digestion and high-depth sequencing. Here we relate 14 chromatin signals (12 histone marks, DNase, and nucleosome positioning) to the binding sites of 119 DNA-binding proteins across a large number of cell lines. We developed a new method for unsupervised pattern discovery, the Clustered AGgregation Tool (CAGT), which accounts for the inherent heterogeneity in signal magnitude, shape, and implicit strand orientation of chromatin marks. We applied CAGT on a total of 5084 data set pairs to obtain an exhaustive catalog of high-resolution patterns of histone modifications and nucleosome positioning signals around bound transcription factors. Our analyses reveal extensive heterogeneity in how histone modifications are deposited, and how nucleosomes are positioned around binding sites. With the exception of the CTCF/cohesin complex, asymmetry of nucleosome positioning is predominant. Asymmetry of histone modifications is also widespread, for all types of chromatin marks examined, including promoter, enhancer, elongation, and repressive marks. The fine-resolution signal shapes discovered by CAGT unveiled novel correlation patterns between chromatin marks, nucleosome positioning, and sequence content. Meta-analyses of the signal profiles revealed a common vocabulary of chromatin signals shared across multiple cell lines and binding proteins.}, language = {en}, number = {9}, urldate = {2015-09-07}, journal = {Genome Research}, author = {Kundaje, Anshul and Kyriazopoulou-Panagiotopoulou, Sofia and Libbrecht, Max and Smith, Cheryl L. and Raha, Debasish and Winters, Elliott E. and Johnson, Steven M. and Snyder, Michael and Batzoglou, Serafim and Sidow, Arend}, month = sep, year = {2012}, pmid = {22955985}, pages = {1735--1747}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6JNAUUTN/Kundaje et al. - 2012 - Ubiquitous heterogeneity and asymmetry of the chro.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RRNDKW8M/1735.html:text/html} } @article{gaffney_controls_2012, title = {Controls of {Nucleosome} {Positioning} in the {Human} {Genome}}, volume = {8}, url = {http://dx.doi.org/10.1371/journal.pgen.1003036}, doi = {10.1371/journal.pgen.1003036}, abstract = {Author SummaryWithin the nucleus of the cell, the genome of eukaryotic organisms is tightly packaged into chromatin. Chromatin is composed of a repeating series of bead-like nucleosomes, each of which is encircled 1.7 times by a string of DNA. The organization of nucleosomes on the genome is fundamentally important because they can prevent other proteins from accessing the DNA. Previous studies of human nucleosomes concluded that most nucleosomes have fuzzy positioning and tend to occupy different locations in different cells. This interpretation, however, may be a consequence of the low resolution of existing data. Here we revisit the question of nucleosome positioning by generating the most precise map of nucleosome positions that has ever been created for a human cell line. We find that 8.7\% of nucleosomes have very consistent positioning, and most nucleosomes are more consistently positioned than expected by chance. Additionally, we estimate that almost half of the genome contains regularly spaced arrays of nucleosomes. Much of this positioning is due to the intrinsic preference of nucleosomes for some DNA sequences over others; but in some regions of the genome, the sequence preferences of nucleosomes are overridden by proteins that out-compete them for binding or displace them using energy from ATP.}, number = {11}, urldate = {2015-09-07}, journal = {PLoS Genet}, author = {Gaffney, Daniel J. and McVicker, Graham and Pai, Athma A. and Fondufe-Mittendorf, Yvonne N. and Lewellen, Noah and Michelini, Katelyn and Widom, Jonathan and Gilad, Yoav and Pritchard, Jonathan K.}, month = nov, year = {2012}, pages = {e1003036}, file = {PLoS Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TICE4R38/Gaffney et al. - 2012 - Controls of Nucleosome Positioning in the Human Ge.pdf:application/pdf} } @article{consortium_integrated_2012, title = {An integrated encyclopedia of {DNA} elements in the human genome}, volume = {489}, copyright = {© 2012 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v489/n7414/full/nature11247.html}, doi = {10.1038/nature11247}, abstract = {The human genome encodes the blueprint of life, but the function of the vast majority of its nearly three billion bases is unknown. The Encyclopedia of DNA Elements (ENCODE) project has systematically mapped regions of transcription, transcription factor association, chromatin structure and histone modification. These data enabled us to assign biochemical functions for 80\% of the genome, in particular outside of the well-studied protein-coding regions. Many discovered candidate regulatory elements are physically associated with one another and with expressed genes, providing new insights into the mechanisms of gene regulation. The newly identified elements also show a statistical correspondence to sequence variants linked to human disease, and can thereby guide interpretation of this variation. Overall, the project provides new insights into the organization and regulation of our genes and genome, and is an expansive resource of functional annotations for biomedical research.}, language = {en}, number = {7414}, urldate = {2015-09-28}, journal = {Nature}, author = {Consortium, The ENCODE Project}, month = sep, year = {2012}, keywords = {genetics, genomics, molecular biology}, pages = {57--74}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XTHEAZAC/Consortium - 2012 - An integrated encyclopedia of DNA elements in the .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BQ8BWPQ4/nature11247.html:text/html;supplemental.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XTHEAZAC/supplemental.pdf:application/pdf} } @article{zhou_charting_2011, title = {Charting histone modifications and the functional organization of mammalian genomes}, volume = {12}, copyright = {© 2010 Nature Publishing Group}, issn = {1471-0056}, url = {http://www.nature.com/nrg/journal/v12/n1/full/nrg2905.html}, doi = {10.1038/nrg2905}, abstract = {A succession of technological advances over the past decade have enabled researchers to chart maps of histone modifications and related chromatin structures with increasing accuracy, comprehensiveness and throughput. The resulting data sets highlight the interplay between chromatin and genome function, dynamic variations in chromatin structure across cellular conditions, and emerging roles for large-scale domains and higher-ordered chromatin organization. Here we review a selection of recent studies that have probed histone modifications and successive layers of chromatin structure in mammalian genomes, the patterns that have been identified and future directions for research.}, language = {en}, number = {1}, urldate = {2015-11-04}, journal = {Nature Reviews Genetics}, author = {Zhou, Vicky W. and Goren, Alon and Bernstein, Bradley E.}, month = jan, year = {2011}, pages = {7--18}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XTXCIXEJ/Zhou et al. - 2011 - Charting histone modifications and the functional .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/G9JNJPI3/nrg2905.html:text/html} } @article{langst_chromatin_2015, title = {Chromatin {Remodelers}: {From} {Function} to {Dysfunction}}, volume = {6}, copyright = {http://creativecommons.org/licenses/by/3.0/}, shorttitle = {Chromatin {Remodelers}}, url = {http://www.mdpi.com/2073-4425/6/2/299}, doi = {10.3390/genes6020299}, abstract = {Chromatin remodelers are key players in the regulation of chromatin accessibility and nucleosome positioning on the eukaryotic DNA, thereby essential for all DNA dependent biological processes. Thus, it is not surprising that upon of deregulation of those molecular machines healthy cells can turn into cancerous cells. Even though the remodeling enzymes are very abundant and a multitude of different enzymes and chromatin remodeling complexes exist in the cell, the particular remodeling complex with its specific nucleosome positioning features must be at the right place at the right time in order to ensure the proper regulation of the DNA dependent processes. To achieve this, chromatin remodeling complexes harbor protein domains that specifically read chromatin targeting signals, such as histone modifications, DNA sequence/structure, non-coding RNAs, histone variants or DNA bound interacting proteins. Recent studies reveal the interaction between non-coding RNAs and chromatin remodeling complexes showing importance of RNA in remodeling enzyme targeting, scaffolding and regulation. In this review, we summarize current understanding of chromatin remodeling enzyme targeting to chromatin and their role in cancer development.}, language = {en}, number = {2}, urldate = {2015-09-28}, journal = {Genes}, author = {Längst, Gernot and Manelyte, Laura}, month = jun, year = {2015}, keywords = {chromatin remodeler, search mechanism, arrest model, non-coding RNA, cancer}, pages = {299--324}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/P7N7NXIN/Längst et Manelyte - 2015 - Chromatin Remodelers From Function to Dysfunction.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KR92U7ET/htm.html:text/html} } @article{bailey_znf143_2015, title = {{ZNF}143 provides sequence specificity to secure chromatin interactions at gene promoters}, volume = {2}, copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, url = {http://www.nature.com/ncomms/2015/150203/ncomms7186/full/ncomms7186.html}, doi = {10.1038/ncomms7186}, abstract = {Chromatin interactions connect distal regulatory elements to target gene promoters guiding stimulus- and lineage-specific transcription. Few factors securing chromatin interactions have so far been identified. Here, by integrating chromatin interaction maps with the large collection of transcription factor-binding profiles provided by the ENCODE project, we demonstrate that the zinc-finger protein ​ZNF143 preferentially occupies anchors of chromatin interactions connecting promoters with distal regulatory elements. It binds directly to promoters and associates with lineage-specific chromatin interactions and gene expression. Silencing ​ZNF143 or modulating its DNA-binding affinity using single-nucleotide polymorphisms (SNPs) as a surrogate of site-directed mutagenesis reveals the sequence dependency of chromatin interactions at gene promoters. We also find that chromatin interactions alone do not regulate gene expression. Together, our results identify ​ZNF143 as a novel chromatin-looping factor that contributes to the architectural foundation of the genome by providing sequence specificity at promoters connected with distal regulatory elements.}, language = {en}, urldate = {2015-11-17}, journal = {Nature Communications}, author = {Bailey, Swneke D. and Zhang, Xiaoyang and Desai, Kinjal and Aid, Malika and Corradin, Olivia and Cowper-Sal·lari, Richard and Akhtar-Zaidi, Batool and Scacheri, Peter C. and Haibe-Kains, Benjamin and Lupien, Mathieu}, month = feb, year = {2015}, keywords = {Biological sciences, genetics, molecular biology}, pages = {6186}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/T3KAUSWX/Bailey et al. - 2015 - ZNF143 provides sequence specificity to secure chr.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Z2XI58RG/ncomms7186.html:text/html} } @article{henikoff_histone_2015, title = {Histone {Variants} and {Epigenetics}}, volume = {7}, issn = {, 1943-0264}, url = {http://cshperspectives.cshlp.org/content/7/1/a019364}, doi = {10.1101/cshperspect.a019364}, abstract = {Histones package and compact DNA by assembling into nucleosome core particles. Most histones are synthesized at S phase for rapid deposition behind replication forks. In addition, the replacement of histones deposited during S phase by variants that can be deposited independently of replication provide the most fundamental level of chromatin differentiation. Alternative mechanisms for depositing different variants can potentially establish and maintain epigenetic states. Variants have also evolved crucial roles in chromosome segregation, transcriptional regulation, DNA repair, and other processes. Investigations into the evolution, structure, and metabolism of histone variants provide a foundation for understanding the participation of chromatin in important cellular processes and in epigenetic memory.}, language = {en}, number = {1}, urldate = {2015-09-28}, journal = {Cold Spring Harbor Perspectives in Biology}, author = {Henikoff, Steven and Smith, M. Mitchell}, month = jan, year = {2015}, pmid = {25561719}, pages = {a019364}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/E6UPDJKI/Henikoff et Smith - 2015 - Histone Variants and Epigenetics.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/E67U5U59/a019364.html:text/html} } @article{barth_fast_2010, title = {Fast signals and slow marks: the dynamics of histone modifications}, volume = {35}, issn = {0968-0004}, shorttitle = {Fast signals and slow marks}, url = {http://www.sciencedirect.com/science/article/pii/S0968000410000940}, doi = {10.1016/j.tibs.2010.05.006}, abstract = {Most multi-cellular organisms adopt a specific gene expression pattern during cellular differentiation. Once established, this pattern is frequently maintained over several cell divisions despite the fact that the initiating signal is no longer present. Differential packaging into chromatin is one such mechanism that allows fixation of transcriptional activity. Recent genome-wide studies demonstrate that actively transcribed regions are characterized by a specific modification pattern of histones, the main protein component of chromatin. These findings support the hypothesis that a histone code uses histone post-translational modifications to stably inscribe particular chromatin structures into the genome. Experiments on the dynamics of histone modifications reveal a striking kinetic difference between methylation, phosphorylation and acetylation, suggesting different roles of these modifications in epigenetically fixing specific gene expression patterns.}, number = {11}, urldate = {2015-09-28}, journal = {Trends in Biochemical Sciences}, author = {Barth, Teresa K. and Imhof, Axel}, month = nov, year = {2010}, pages = {618--626}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UPFQHFWS/Barth et Imhof - 2010 - Fast signals and slow marks the dynamics of histo.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ACW228QE/S0968000410000940.html:text/html} } @article{venkatesh_histone_2015, title = {Histone exchange, chromatin structure and the regulation of transcription}, volume = {16}, copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1471-0072}, url = {http://www.nature.com/nrm/journal/v16/n3/full/nrm3941.html}, doi = {10.1038/nrm3941}, abstract = {The packaging of DNA into strings of nucleosomes is one of the features that allows eukaryotic cells to tightly regulate gene expression. The ordered disassembly of nucleosomes permits RNA polymerase II (Pol II) to access the DNA, whereas nucleosomal reassembly impedes access, thus preventing transcription and mRNA synthesis. Chromatin modifications, chromatin remodellers, histone chaperones and histone variants regulate nucleosomal dynamics during transcription. Disregulation of nucleosome dynamics results in aberrant transcription initiation, producing non-coding RNAs. Ongoing research is elucidating the molecular mechanisms that regulate chromatin structure during transcription by preventing histone exchange, thereby limiting non-coding RNA expression.}, language = {en}, number = {3}, urldate = {2015-09-28}, journal = {Nature Reviews Molecular Cell Biology}, author = {Venkatesh, Swaminathan and Workman, Jerry L.}, month = mar, year = {2015}, pages = {178--189}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/DSVWNBZV/Venkatesh et Workman - 2015 - Histone exchange, chromatin structure and the regu.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/W2JBHNFP/nrm3941.html:text/html} } @article{tsompana_chromatin_2014, title = {Chromatin accessibility: a window into the genome}, volume = {7}, issn = {1756-8935}, shorttitle = {Chromatin accessibility}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4253006/}, doi = {10.1186/1756-8935-7-33}, abstract = {Transcriptional activation throughout the eukaryotic lineage has been tightly linked with disruption of nucleosome organization at promoters, enhancers, silencers, insulators and locus control regions due to transcription factor binding. Regulatory DNA thus coincides with open or accessible genomic sites of remodeled chromatin. Current chromatin accessibility assays are used to separate the genome by enzymatic or chemical means and isolate either the accessible or protected locations. The isolated DNA is then quantified using a next-generation sequencing platform. Wide application of these assays has recently focused on the identification of the instrumental epigenetic changes responsible for differential gene expression, cell proliferation, functional diversification and disease development. Here we discuss the limitations and advantages of current genome-wide chromatin accessibility assays with especial attention on experimental precautions and sequence data analysis. We conclude with our perspective on future improvements necessary for moving the field of chromatin profiling forward.}, urldate = {2015-10-01}, journal = {Epigenetics \& Chromatin}, author = {Tsompana, Maria and Buck, Michael J}, month = nov, year = {2014}, pmid = {25473421}, pmcid = {PMC4253006}, file = {PubMed Central Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/X5EEV2QU/Tsompana et Buck - 2014 - Chromatin accessibility a window into the genome.pdf:application/pdf} } @article{iwafuchi-doi_pioneer_2014, title = {Pioneer transcription factors in cell reprogramming}, volume = {28}, issn = {0890-9369, 1549-5477}, url = {http://genesdev.cshlp.org/content/28/24/2679}, doi = {10.1101/gad.253443.114}, abstract = {A subset of eukaryotic transcription factors possesses the remarkable ability to reprogram one type of cell into another. The transcription factors that reprogram cell fate are invariably those that are crucial for the initial cell programming in embryonic development. To elicit cell programming or reprogramming, transcription factors must be able to engage genes that are developmentally silenced and inappropriate for expression in the original cell. Developmentally silenced genes are typically embedded in “closed” chromatin that is covered by nucleosomes and not hypersensitive to nuclease probes such as DNase I. Biochemical and genomic studies have shown that transcription factors with the highest reprogramming activity often have the special ability to engage their target sites on nucleosomal DNA, thus behaving as “pioneer factors” to initiate events in closed chromatin. Other reprogramming factors appear dependent on pioneer factors for engaging nucleosomes and closed chromatin. However, certain genomic domains in which nucleosomes are occluded by higher-order chromatin structures, such as in heterochromatin, are resistant to pioneer factor binding. Understanding the means by which pioneer factors can engage closed chromatin and how heterochromatin can prevent such binding promises to advance our ability to reprogram cell fates at will and is the topic of this review.}, language = {en}, number = {24}, urldate = {2015-10-01}, journal = {Genes \& Development}, author = {Iwafuchi-Doi, Makiko and Zaret, Kenneth S.}, month = dec, year = {2014}, pmid = {25512556}, keywords = {Chromatin, pioneer transcription factor, transdifferentiation, Reprogramming, development, nucleosome}, pages = {2679--2692}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/CEITEC7S/Iwafuchi-Doi et Zaret - 2014 - Pioneer transcription factors in cell reprogrammin.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KB29H8TM/2679.html:text/html} } @article{cairns_logic_2009, title = {The logic of chromatin architecture and remodelling at promoters}, volume = {461}, copyright = {© 2009 Nature Publishing Group}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v461/n7261/full/nature08450.html}, doi = {10.1038/nature08450}, abstract = {The regulation of gene transcription involves a dynamic balance between packaging regulatory sequences into chromatin and allowing transcriptional regulators access to these sequences. Access is restricted by the nucleosomes, but these can be repositioned or ejected by enzymes known as nucleosome remodellers. In addition, the DNA sequence can impart stiffness or curvature to the DNA, thereby affecting the position of nucleosomes on the DNA, influencing particular promoter 'architectures'. Recent genome-wide studies in yeast suggest that constitutive and regulated genes have architectures that differ in terms of nucleosome position, turnover, remodelling requirements and transcriptional noise.}, language = {en}, number = {7261}, urldate = {2015-10-02}, journal = {Nature}, author = {Cairns, Bradley R.}, month = sep, year = {2009}, pages = {193--198}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XWRUTA2D/Cairns - 2009 - The logic of chromatin architecture and remodellin.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/CPRFW37U/nature08450.html:text/html} } @article{the_uk10k_consortium_uk10k_2015, title = {The {UK}10K project identifies rare variants in health and disease}, volume = {526}, copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v526/n7571/full/nature14962.html}, doi = {10.1038/nature14962}, abstract = {The contribution of rare and low-frequency variants to human traits is largely unexplored. Here we describe insights from sequencing whole genomes (low read depth, 7×) or exomes (high read depth, 80×) of nearly 10,000 individuals from population-based and disease collections. In extensively phenotyped cohorts we characterize over 24 million novel sequence variants, generate a highly accurate imputation reference panel and identify novel alleles associated with levels of triglycerides (APOB), adiponectin (ADIPOQ) and low-density lipoprotein cholesterol (LDLR and RGAG1) from single-marker and rare variant aggregation tests. We describe population structure and functional annotation of rare and low-frequency variants, use the data to estimate the benefits of sequencing for association studies, and summarize lessons from disease-specific collections. Finally, we make available an extensive resource, including individual-level genetic and phenotypic data and web-based tools to facilitate the exploration of association results.}, language = {en}, number = {7571}, urldate = {2015-10-09}, journal = {Nature}, author = {{The UK10K Consortium}}, month = oct, year = {2015}, keywords = {Next-generation sequencing, Genome-wide association studies}, pages = {82--90}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/CUKIZJGS/The UK10K Consortium - 2015 - The UK10K project identifies rare variants in heal.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HZEXSHCX/nature14962.html:text/html} } @article{locke_coordinated_2015, title = {Coordinated epigenetic remodelling of transcriptional networks occurs during early breast carcinogenesis}, volume = {7}, copyright = {2015 Locke et al.; licensee BioMed Central.}, issn = {1868-7083}, url = {http://www.clinicalepigeneticsjournal.com/content/7/1/52/abstract}, doi = {10.1186/s13148-015-0086-0}, abstract = {Dysregulation of the epigenome is a common event in malignancy; however, deciphering the earliest cancer-associated epigenetic events remains a challenge. Cancer epigenome studies to date have primarily utilised cancer cell lines or clinical samples, where it is difficult to identify the initial epigenetic lesions from those that occur over time. Here, we analysed the epigenome of human mammary epithelial cells (HMEC) and a matched variant cell population (vHMEC) that have spontaneously escaped senescence and undergone partial carcinogenic transformation. Using this model of basal-like breast carcinogenesis, we provide striking new insights into the very first epigenetic changes that occur during the initial stages of malignancy.}, language = {en}, number = {1}, urldate = {2015-10-19}, journal = {Clinical Epigenetics}, author = {Locke, Warwick J. and Zotenko, Elena and Stirzaker, Clare and Robinson, Mark D. and Hinshelwood, Rebecca A. and Stone, Andrew and Reddel, Roger R. and Huschtscha, Lily I. and Clark, Susan J.}, month = may, year = {2015}, pmid = {25960784}, keywords = {DNA Methylation, Methylome, Basal breast cancer, Epigenome sequencing, Biomarker, epigenetics}, pages = {52}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7R7V74EJ/Locke et al. - 2015 - Coordinated epigenetic remodelling of transcriptio.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/92XAVETV/52.html:text/html} } @article{su_somatic_2015, title = {Somatic {Cell} {Fusions} {Reveal} {Extensive} {Heterogeneity} in {Basal}-like {Breast} {Cancer}}, volume = {11}, issn = {2211-1247}, url = {http://www.sciencedirect.com/science/article/pii/S2211124715005239}, doi = {10.1016/j.celrep.2015.05.011}, abstract = {Summary Basal-like and luminal breast tumors have distinct clinical behavior and molecular profiles, yet the underlying mechanisms are poorly defined. To interrogate processes that determine these distinct phenotypes and their inheritance pattern, we generated somatic cell fusions and performed integrated genetic and epigenetic (DNA methylation and chromatin) profiling. We found that the basal-like trait is generally dominant and is largely defined by epigenetic repression of luminal transcription factors. Definition of super-enhancers highlighted a core program common in luminal cells but a high degree of heterogeneity in basal-like breast cancers that correlates with clinical outcome. We also found that protein extracts of basal-like cells are sufficient to induce a luminal-to-basal phenotypic switch, implying a trigger of basal-like autoregulatory circuits. We determined that KDM6A might be required for luminal-basal fusions, and we identified EN1, TBX18, and TCF4 as candidate transcriptional regulators of the luminal-to-basal switch. Our findings highlight the remarkable epigenetic plasticity of breast cancer cells.}, number = {10}, urldate = {2015-11-02}, journal = {Cell Reports}, author = {Su, Ying and Subedee, Ashim and Bloushtain-Qimron, Noga and Savova, Virginia and Krzystanek, Marcin and Li, Lewyn and Marusyk, Andriy and Tabassum, Doris P. and Zak, Alexander and Flacker, Mary Jo and Li, Mei and Lin, Jessica J. and Sukumar, Saraswati and Suzuki, Hiromu and Long, Henry and Szallasi, Zoltan and Gimelbrant, Alexander and Maruyama, Reo and Polyak, Kornelia}, month = jun, year = {2015}, pages = {1549--1563}, file = {ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4MNPHIBW/S2211124715005239.html:text/html} } @article{gascard_epigenetic_2015, title = {Epigenetic and transcriptional determinants of the human breast}, volume = {6}, copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, url = {http://www.nature.com/ncomms/2015/150218/ncomms7351/full/ncomms7351.html}, doi = {10.1038/ncomms7351}, abstract = {While significant effort has been dedicated to the characterization of epigenetic changes associated with prenatal differentiation, relatively little is known about the epigenetic changes that accompany post-natal differentiation where fully functional differentiated cell types with limited lifespans arise. Here we sought to address this gap by generating epigenomic and transcriptional profiles from primary human breast cell types isolated from disease-free human subjects. From these data we define a comprehensive human breast transcriptional network, including a set of myoepithelial- and luminal epithelial-specific intronic retention events. Intersection of epigenetic states with RNA expression from distinct breast epithelium lineages demonstrates that mCpG provides a stable record of exonic and intronic usage, whereas H3K36me3 is dynamic. We find a striking asymmetry in epigenomic reprogramming between luminal and myoepithelial cell types, with the genomes of luminal cells harbouring more than twice the number of hypomethylated enhancer elements compared with myoepithelial cells.}, language = {en}, urldate = {2015-11-18}, journal = {Nature Communications}, author = {Gascard, Philippe and Bilenky, Misha and Sigaroudinia, Mahvash and Zhao, Jianxin and Li, Luolan and Carles, Annaick and Delaney, Allen and Tam, Angela and Kamoh, Baljit and Cho, Stephanie and Griffith, Malachi and Chu, Andy and Robertson, Gordon and Cheung, Dorothy and Li, Irene and Heravi-Moussavi, Alireza and Moksa, Michelle and Mingay, Matthew and Hussainkhel, Angela and Davis, Brad and Nagarajan, Raman P. and Hong, Chibo and Echipare, Lorigail and O’Geen, Henriette and Hangauer, Matthew J. and Cheng, Jeffrey B. and Neel, Dana and Hu, Donglei and McManus, Michael T. and Moore, Richard and Mungall, Andrew and Ma, Yussanne and Plettner, Patrick and Ziv, Elad and Wang, Ting and Farnham, Peggy J. and Jones, Steven J. M. and Marra, Marco A. and Tlsty, Thea D. and Costello, Joseph F. and Hirst, Martin}, month = feb, year = {2015}, keywords = {Biological sciences, developmental biology, molecular biology}, pages = {6351}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9KJ75WN5/Gascard et al. - 2015 - Epigenetic and transcriptional determinants of the.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/766Q6FZB/ncomms7351.html:text/html} } @article{soufi_pioneer_2015, title = {Pioneer {Transcription} {Factors} {Target} {Partial} {DNA} {Motifs} on {Nucleosomes} to {Initiate} {Reprogramming}}, volume = {161}, issn = {0092-8674}, url = {http://www.cell.com/article/S0092867415003049/abstract}, doi = {10.1016/j.cell.2015.03.017}, abstract = {Pioneer transcription factors (TFs) access silent chromatin and initiate cell-fate changes, using diverse types of DNA binding domains (DBDs). FoxA, the paradigm pioneer TF, has a winged helix DBD that resembles linker histone and thereby binds its target sites on nucleosomes and in compacted chromatin. Herein, we compare the nucleosome and chromatin targeting activities of Oct4 (POU DBD), Sox2 (HMG box DBD), Klf4 (zinc finger DBD), and c-Myc (bHLH DBD), which together reprogram somatic cells to pluripotency. Purified Oct4, Sox2, and Klf4 proteins can bind nucleosomes in vitro, and in vivo they preferentially target silent sites enriched for nucleosomes. Pioneer activity relates simply to the ability of a given DBD to target partial motifs displayed on the nucleosome surface. Such partial motif recognition can occur by coordinate binding between factors. Our findings provide insight into how pioneer factors can target naive chromatin sites.}, language = {English}, number = {3}, urldate = {2015-12-02}, journal = {Cell}, author = {Soufi, Abdenour and Garcia, Meilin Fernandez and Jaroszewicz, Artur and Osman, Nebiyu and Pellegrini, Matteo and Zaret, Kenneth S.}, month = apr, year = {2015}, pmid = {25892221}, pages = {555--568}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6KNK7QSD/Soufi et al. - 2015 - Pioneer Transcription Factors Target Partial DNA M.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5GT7KEX7/S0092-8674(15)00304-9.html:text/html} } @article{weber_nucleosomes_2014, title = {Nucleosomes {Are} {Context}-{Specific}, {H}2A.{Z}-{Modulated} {Barriers} to {RNA} {Polymerase}}, volume = {53}, issn = {1097-2765}, url = {http://www.cell.com/article/S1097276514001592/abstract}, doi = {10.1016/j.molcel.2014.02.014}, abstract = {Nucleosomes are barriers to transcription in vitro; however, their effects on RNA polymerase in vivo are unknown. Here we describe a simple and general strategy to comprehensively map the positions of elongating and arrested RNA polymerase II (RNAPII) at nucleotide resolution. We find that the entry site of the first (+1) nucleosome is a barrier to RNAPII for essentially all genes, including those undergoing regulated pausing farther upstream. In contrast to the +1 nucleosome, gene body nucleosomes are low barriers and cause RNAPII stalling both at the entry site and near the dyad axis. The extent of the +1 nucleosome barrier correlates with nucleosome occupancy but anticorrelates with enrichment of histone variant H2A.Z. Importantly, depletion of H2A.Z from a nucleosome position results in a higher barrier to RNAPII. Our results suggest that nucleosomes present significant, context-specific barriers to RNAPII in vivo that can be tuned by the incorporation of H2A.Z.}, language = {English}, number = {5}, urldate = {2016-01-07}, journal = {Molecular Cell}, author = {Weber, Christopher M. and Ramachandran, Srinivas and Henikoff, Steven}, month = jun, year = {2014}, pmid = {24606920}, pages = {819--830}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3MMPKGM5/Weber et al. - 2014 - Nucleosomes Are Context-Specific, H2A.Z-Modulated .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/2VESJ49J/S1097-2765(14)00159-2.html:text/html} } @article{li_combining_2015, series = {({Epi}){Genomics} approaches and their applications}, title = {Combining {MeDIP}-seq and {MRE}-seq to investigate genome-wide {CpG} methylation}, volume = {72}, issn = {1046-2023}, url = {http://www.sciencedirect.com/science/article/pii/S1046202314003594}, doi = {10.1016/j.ymeth.2014.10.032}, abstract = {DNA CpG methylation is a widespread epigenetic mark in high eukaryotes including mammals. DNA methylation plays key roles in diverse biological processes such as X chromosome inactivation, transposable element repression, genomic imprinting, and control of gene expression. Recent advancements in sequencing-based DNA methylation profiling methods provide an unprecedented opportunity to measure DNA methylation in a genome-wide fashion, making it possible to comprehensively investigate the role of DNA methylation. Several methods have been developed, such as Whole Genome Bisulfite Sequencing (WGBS), Reduced Representation Bisulfite Sequencing (RRBS), and enrichment-based methods including Methylation Dependent ImmunoPrecipitation followed by sequencing (MeDIP-seq), methyl-CpG binding domain (MBD) protein-enriched genome sequencing (MBD-seq), methyltransferase-directed Transfer of Activated Groups followed by sequencing (mTAG), and Methylation-sensitive Restriction Enzyme digestion followed by sequencing (MRE-seq). These methods differ by their genomic CpG coverage, resolution, quantitative accuracy, cost, and software for analyzing the data. Among these, WGBS is considered the gold standard. However, it is still a cost-prohibitive technology for a typical laboratory due to the required sequencing depth. We found that by integrating two enrichment-based methods that are complementary in nature (i.e., MeDIP-seq and MRE-seq), we can significantly increase the efficiency of whole DNA methylome profiling. By using two recently developed computational algorithms (i.e., M\&M and methylCRF), the combination of MeDIP-seq and MRE-seq produces genome-wide CpG methylation measurement at high coverage and high resolution, and robust predictions of differentially methylated regions. Thus, the combination of the two enrichment-based methods provides a cost-effective alternative to WGBS. In this article we describe both the experimental protocols for performing MeDIP-seq and MRE-seq, and the computational protocols for running M\&M and methylCRF.}, urldate = {2015-11-20}, journal = {Methods}, author = {Li, Daofeng and Zhang, Bo and Xing, Xiaoyun and Wang, Ting}, month = jan, year = {2015}, keywords = {DNA Methylation, MeDIP-seq, MRE-seq, M\&M, methylCRF}, pages = {29--40}, file = {1-s2.0-S1046202314003594-main.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HNFTVWZG/1-s2.0-S1046202314003594-main.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HNFTVWZG/S1046202314003594.html:text/html} } @article{jolma_dna-dependent_2015, title = {{DNA}-dependent formation of transcription factor pairs alters their binding specificity}, volume = {527}, copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v527/n7578/full/nature15518.html}, doi = {10.1038/nature15518}, abstract = {Gene expression is regulated by transcription factors (TFs), proteins that recognize short DNA sequence motifs. Such sequences are very common in the human genome, and an important determinant of the specificity of gene expression is the cooperative binding of multiple TFs to closely located motifs. However, interactions between DNA-bound TFs have not been systematically characterized. To identify TF pairs that bind cooperatively to DNA, and to characterize their spacing and orientation preferences, we have performed consecutive affinity-purification systematic evolution of ligands by exponential enrichment (CAP-SELEX) analysis of 9,400 TF–TF–DNA interactions. This analysis revealed 315 TF–TF interactions recognizing 618 heterodimeric motifs, most of which have not been previously described. The observed cooperativity occurred promiscuously between TFs from diverse structural families. Structural analysis of the TF pairs, including a novel crystal structure of MEIS1 and DLX3 bound to their identified recognition site, revealed that the interactions between the TFs were predominantly mediated by DNA. Most TF pair sites identified involved a large overlap between individual TF recognition motifs, and resulted in recognition of composite sites that were markedly different from the individual TF’s motifs. Together, our results indicate that the DNA molecule commonly plays an active role in cooperative interactions that define the gene regulatory lexicon.}, language = {en}, number = {7578}, urldate = {2015-12-02}, journal = {Nature}, author = {Jolma, Arttu and Yin, Yimeng and Nitta, Kazuhiro R. and Dave, Kashyap and Popov, Alexander and Taipale, Minna and Enge, Martin and Kivioja, Teemu and Morgunova, Ekaterina and Taipale, Jussi}, month = nov, year = {2015}, keywords = {DNA, Gene regulation, X-ray crystallography, Transcriptional regulatory elements}, pages = {384--388}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MIJZ6935/Jolma et al. - 2015 - DNA-dependent formation of transcription factor pa.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/DPX4EIAA/nature15518.html:text/html} } @article{zentner_high-resolution_2014, title = {High-resolution digital profiling of the epigenome}, volume = {15}, copyright = {© 2014 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1471-0056}, url = {http://www.nature.com/nrg/journal/v15/n12/full/nrg3798.html}, doi = {10.1038/nrg3798}, abstract = {The widespread adoption of short-read DNA sequencing as a digital epigenomic readout platform has motivated the development of genome-wide tools that achieve base-pair resolution. New methods for footprinting and affinity purification of nucleosomes, RNA polymerases, chromatin remodellers and transcription factors have increased the resolution of epigenomic profiling by two orders of magnitude, leading to new insights into how the chromatin landscape affects gene regulation. These digital epigenomic tools have also been applied to directly profile both turnover kinetics and transcription in situ. In this Review, we describe how these new genome-wide tools allow interrogation of diverse aspects of the epigenome.}, language = {en}, number = {12}, urldate = {2015-12-16}, journal = {Nature Reviews Genetics}, author = {Zentner, Gabriel E. and Henikoff, Steven}, month = dec, year = {2014}, keywords = {Next-generation sequencing, Chromatin analysis, epigenetics, Epigenomics}, pages = {814--827}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/H4M8AG8H/Zentner et Henikoff - 2014 - High-resolution digital profiling of the epigenome.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8FX7956R/nrg3798.html:text/html} } @article{soon_high-throughput_2014, title = {High-throughput sequencing for biology and medicine}, volume = {9}, issn = {1744-4292}, url = {http://msb.embopress.org/cgi/doi/10.1038/msb.2012.61}, doi = {10.1038/msb.2012.61}, language = {en}, number = {1}, urldate = {2016-01-14}, journal = {Molecular Systems Biology}, author = {Soon, W. W. and Hariharan, M. and Snyder, M. P.}, month = apr, year = {2014}, pages = {640--640}, file = {Soon13.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Soon13.pdf:application/pdf} } @article{whitesides_whitesides_2004, title = {Whitesides' {Group}: {Writing} a {Paper}}, volume = {16}, issn = {1521-4095}, shorttitle = {Whitesides' {Group}}, url = {http://onlinelibrary.wiley.com/doi/10.1002/adma.200400767/abstract}, doi = {10.1002/adma.200400767}, language = {en}, number = {15}, urldate = {2016-01-14}, journal = {Advanced Materials}, author = {Whitesides, G. M.}, month = aug, year = {2004}, pages = {1375--1377}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/CC6CNIUE/Whitesides - 2004 - Whitesides' Group Writing a Paper.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/52MA9ZBD/abstract.html:text/html} } @article{vale_accelerating_2015, title = {Accelerating scientific publication in biology}, volume = {112}, issn = {0027-8424, 1091-6490}, url = {http://www.pnas.org/content/112/44/13439}, doi = {10.1073/pnas.1511912112}, abstract = {Scientific publications enable results and ideas to be transmitted throughout the scientific community. The number and type of journal publications also have become the primary criteria used in evaluating career advancement. Our analysis suggests that publication practices have changed considerably in the life sciences over the past 30 years. More experimental data are now required for publication, and the average time required for graduate students to publish their first paper has increased and is approaching the desirable duration of PhD training. Because publication is generally a requirement for career progression, schemes to reduce the time of graduate student and postdoctoral training may be difficult to implement without also considering new mechanisms for accelerating communication of their work. The increasing time to publication also delays potential catalytic effects that ensue when many scientists have access to new information. The time has come for life scientists, funding agencies, and publishers to discuss how to communicate new findings in a way that best serves the interests of the public and the scientific community.}, language = {en}, number = {44}, urldate = {2016-01-15}, journal = {Proceedings of the National Academy of Sciences}, author = {Vale, Ronald D.}, month = nov, year = {2015}, pmid = {26508643}, keywords = {scientific publication, arXiv, PhD training, career advancement, journals}, pages = {13439--13446}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/G9NU54KG/Vale - 2015 - Accelerating scientific publication in biology.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UARRVZJ4/13439.html:text/html} } @article{chen_chromatin_2014, title = {Chromatin modifiers and remodellers: regulators of cellular differentiation}, volume = {15}, copyright = {© 2013 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1471-0056}, shorttitle = {Chromatin modifiers and remodellers}, url = {http://www.nature.com/nrg/journal/v15/n2/full/nrg3607.html}, doi = {10.1038/nrg3607}, abstract = {Cellular differentiation is, by definition, epigenetic. Genome-wide profiling of pluripotent cells and differentiated cells suggests global chromatin remodelling during differentiation, which results in a progressive transition from a fairly open chromatin configuration to a more compact state. Genetic studies in mouse models show major roles for a variety of histone modifiers and chromatin remodellers in key developmental transitions, such as the segregation of embryonic and extra-embryonic lineages in blastocyst stage embryos, the formation of the three germ layers during gastrulation and the differentiation of adult stem cells. Furthermore, rather than merely stabilizing the gene expression changes that are driven by developmental transcription factors, there is emerging evidence that chromatin regulators have multifaceted roles in cell fate decisions.}, language = {en}, number = {2}, urldate = {2016-01-18}, journal = {Nature Reviews Genetics}, author = {Chen, Taiping and Dent, Sharon Y. R.}, month = feb, year = {2014}, keywords = {Chromatin remodelling, Differentiation, Histone post-translational modifications, Pluripotency}, pages = {93--106}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/776XAIRC/Chen et Dent - 2014 - Chromatin modifiers and remodellers regulators of.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XPB7IVZ2/nrg3607.html:text/html} } @article{voss_dynamic_2014, title = {Dynamic regulation of transcriptional states by chromatin and transcription factors}, volume = {15}, copyright = {© 2013 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1471-0056}, url = {http://www.nature.com/nrg/journal/v15/n2/full/nrg3623.html}, doi = {10.1038/nrg3623}, abstract = {The interaction of regulatory proteins with the complex nucleoprotein structures that are found in mammalian cells involves chromatin reorganization at multiple levels. Mechanisms that support these transitions are complex on many timescales, which range from milliseconds to minutes or hours. In this Review, we discuss emerging concepts regarding the function of regulatory elements in living cells. We also explore the involvement of these dynamic and stochastic processes in the evolution of fluctuating transcriptional activity states that are now commonly reported in eukaryotic systems.}, language = {en}, number = {2}, urldate = {2016-01-18}, journal = {Nature Reviews Genetics}, author = {Voss, Ty C. and Hager, Gordon L.}, month = feb, year = {2014}, keywords = {Gene regulation, Chromatin remodelling, Nuclear receptors}, pages = {69--81}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HHXI44C5/Voss et Hager - 2014 - Dynamic regulation of transcriptional states by ch.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/2MW9DB9V/nrg3623.html:text/html} } @article{yusuf_transcription_2012, title = {The {Transcription} {Factor} {Encyclopedia}}, volume = {13}, copyright = {2012 Yusuf et al.; licensee BioMed Central Ltd.}, issn = {1465-6906}, url = {http://genomebiology.com/2012/13/3/R24/abstract}, doi = {10.1186/gb-2012-13-3-r24}, abstract = {Here we present the Transcription Factor Encyclopedia (TFe), a new web-based compendium of mini review articles on transcription factors (TFs) that is founded on the principles of open access and collaboration. Our consortium of over 100 researchers has collectively contributed over 130 mini review articles on pertinent human, mouse and rat TFs. Notable features of the TFe website include a high-quality PDF generator and web API for programmatic data retrieval. TFe aims to rapidly educate scientists about the TFs they encounter through the delivery of succinct summaries written and vetted by experts in the field. TFe is available at http://www.cisreg.ca/tfe.}, language = {en}, number = {3}, urldate = {2016-01-19}, journal = {Genome Biology}, author = {Yusuf, Dimas and Butland, Stefanie L. and Swanson, Magdalena I. and Bolotin, Eugene and Ticoll, Amy and Cheung, Warren A. and Zhang, Xiao Y. Cindy and Dickman, Christopher TD and Fulton, Debra L. and Lim, Jonathan S. and Schnabl, Jake M. and Ramos, Oscar HP and Vasseur-Cognet, Mireille and Leeuw, Charles N. de and Simpson, Elizabeth M. and Ryffel, Gerhart U. and Lam, Eric W.-F. and Kist, Ralf and Wilson, Miranda SC and Marco-Ferreres, Raquel and Brosens, Jan J. and Beccari, Leonardo L. and Bovolenta, Paola and Benayoun, Bérénice A. and Monteiro, Lara J. and Schwenen, Helma DC and Grontved, Lars and Wederell, Elizabeth and Mandrup, Susanne and Veitia, Reiner A. and Chakravarthy, Harini and Hoodless, Pamela A. and Mancarelli, M. Michela and Torbett, Bruce E. and Banham, Alison H. and Reddy, Sekhar P. and Cullum, Rebecca L. and Liedtke, Michaela and Tschan, Mario P. and Vaz, Michelle and Rizzino, Angie and Zannini, Mariastella and Frietze, Seth and Farnham, Peggy J. and Eijkelenboom, Astrid and Brown, Philip J. and Laperrière, David and Leprince, Dominique and Cristofaro, Tiziana de and Prince, Kelly L. and Putker, Marrit and Peso, Luis del and Camenisch, Gieri and Wenger, Roland H. and Mikula, Michal and Rozendaal, Marieke and Mader, Sylvie and Ostrowski, Jerzy and Rhodes, Simon J. and Rechem, Capucine Van and Boulay, Gaylor and Olechnowicz, Sam WZ and Breslin, Mary B. and Lan, Michael S. and Nanan, Kyster K. and Wegner, Michael and Hou, Juan and Mullen, Rachel D. and Colvin, Stephanie C. and Noy, Peter J. and Webb, Carol F. and Witek, Matthew E. and Ferrell, Scott and Daniel, Juliet M. and Park, Jason and Waldman, Scott A. and Peet, Daniel J. and Taggart, Michael and Jayaraman, Padma-Sheela and Karrich, Julien J. and Blom, Bianca and Vesuna, Farhad and O'Geen, Henriette and Sun, Yunfu and Gronostajski, Richard M. and Woodcroft, Mark W. and Hough, Margaret R. and Chen, Edwin and Europe-Finner, G. Nicholas and Karolczak-Bayatti, Magdalena and Bailey, Jarrod and Hankinson, Oliver and Raman, Venu and LeBrun, David P. and Biswal, Shyam and Harvey, Christopher J. and DeBruyne, Jason P. and Hogenesch, John B. and Hevner, Robert F. and Héligon, Christophe and Luo, Xin M. and Blank, Marissa C. and Millen, Kathleen J. and Sharlin, David S. and Forrest, Douglas and Dahlman-Wright, Karin and Zhao, Chunyan and Mishima, Yuriko and Sinha, Satrajit and Chakrabarti, Rumela and Portales-Casamar, Elodie and Sladek, Frances M. and Bradley, Philip H. and Wasserman, Wyeth W.}, month = mar, year = {2012}, pmid = {22458515}, pages = {R24}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4ZWWPCW9/Yusuf et al. - 2012 - The Transcription Factor Encyclopedia.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/T826TI5M/R24.html:text/html} } @article{arvey_sequence_2012, title = {Sequence and chromatin determinants of cell-type–specific transcription factor binding}, volume = {22}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/22/9/1723}, doi = {10.1101/gr.127712.111}, abstract = {Gene regulatory programs in distinct cell types are maintained in large part through the cell-type–specific binding of transcription factors (TFs). The determinants of TF binding include direct DNA sequence preferences, DNA sequence preferences of cofactors, and the local cell-dependent chromatin context. To explore the contribution of DNA sequence signal, histone modifications, and DNase accessibility to cell-type–specific binding, we analyzed 286 ChIP-seq experiments performed by the ENCODE Consortium. This analysis included experiments for 67 transcriptional regulators, 15 of which were profiled in both the GM12878 (lymphoblastoid) and K562 (erythroleukemic) human hematopoietic cell lines. To model TF-bound regions, we trained support vector machines (SVMs) that use flexible k-mer patterns to capture DNA sequence signals more accurately than traditional motif approaches. In addition, we trained SVM spatial chromatin signatures to model local histone modifications and DNase accessibility, obtaining significantly more accurate TF occupancy predictions than simpler approaches. Consistent with previous studies, we find that DNase accessibility can explain cell-line–specific binding for many factors. However, we also find that of the 10 factors with prominent cell-type–specific binding patterns, four display distinct cell-type–specific DNA sequence preferences according to our models. Moreover, for two factors we identify cell-specific binding sites that are accessible in both cell types but bound only in one. For these sites, cell-type–specific sequence models, rather than DNase accessibility, are better able to explain differential binding. Our results suggest that using a single motif for each TF and filtering for chromatin accessible loci is not always sufficient to accurately account for cell-type–specific binding profiles.}, language = {en}, number = {9}, urldate = {2016-01-19}, journal = {Genome Research}, author = {Arvey, Aaron and Agius, Phaedra and Noble, William Stafford and Leslie, Christina}, month = sep, year = {2012}, pmid = {22955984}, pages = {1723--1734}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/EIBI77AP/Arvey et al. - 2012 - Sequence and chromatin determinants of cell-type–s.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/J34PZE7C/1723.html:text/html} } @article{wingender_tfclass:_2013, title = {{TFClass}: an expandable hierarchical classification of human transcription factors}, volume = {41}, issn = {0305-1048, 1362-4962}, shorttitle = {{TFClass}}, url = {http://nar.oxfordjournals.org/content/41/D1/D165}, doi = {10.1093/nar/gks1123}, abstract = {TFClass (http://tfclass.bioinf.med.uni-goettingen.de/) provides a comprehensive classification of human transcription factors based on their DNA-binding domains. Transcription factors constitute a large functional family of proteins directly regulating the activity of genes. Most of them are sequence-specific DNA-binding proteins, thus reading out the information encoded in cis-regulatory DNA elements of promoters, enhancers and other regulatory regions of a genome. TFClass is a database that classifies human transcription factors by a six-level classification schema, four of which are abstractions according to different criteria, while the fifth level represents TF genes and the sixth individual gene products. Altogether, nine superclasses have been identified, comprising 40 classes and 111 families. Counted by genes, 1558 human TFs have been classified so far or {\textgreater}2900 different TFs when including their isoforms generated by alternative splicing or protein processing events. With this classification, we hope to provide a basis for deciphering protein–DNA recognition codes; moreover, it can be used for constructing expanded transcriptional networks by inferring additional TF-target gene relations.}, language = {en}, number = {D1}, urldate = {2016-01-19}, journal = {Nucleic Acids Research}, author = {Wingender, Edgar and Schoeps, Torsten and Dönitz, Jürgen}, month = jan, year = {2013}, pmid = {23180794}, pages = {D165--D170}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Q3PRSC5J/Wingender et al. - 2013 - TFClass an expandable hierarchical classification.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GS9XIDFU/D165.html:text/html} } @article{spitz_transcription_2012, title = {Transcription factors: from enhancer binding to developmental control}, volume = {13}, copyright = {© 2012 Nature Publishing Group}, issn = {1471-0056}, shorttitle = {Transcription factors}, url = {http://www.nature.com/nrg/journal/v13/n9/full/nrg3207.html}, doi = {10.1038/nrg3207}, abstract = {Developmental progression is driven by specific spatiotemporal domains of gene expression, which give rise to stereotypically patterned embryos even in the presence of environmental and genetic variation. Views of how transcription factors regulate gene expression are changing owing to recent genome-wide studies of transcription factor binding and RNA expression. Such studies reveal patterns that, at first glance, seem to contrast with the robustness of the developmental processes they encode. Here, we review our current knowledge of transcription factor function from genomic and genetic studies and discuss how different strategies, including extensive cooperative regulation (both direct and indirect), progressive priming of regulatory elements, and the integration of activities from multiple enhancers, confer specificity and robustness to transcriptional regulation during development.}, language = {en}, number = {9}, urldate = {2016-01-19}, journal = {Nature Reviews Genetics}, author = {Spitz, François and Furlong, Eileen E. M.}, month = sep, year = {2012}, pages = {613--626}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/88HIRA68/Spitz et Furlong - 2012 - Transcription factors from enhancer binding to de.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/F4CR3GTQ/nrg3207.html:text/html} } @article{geertz_massively_2012, title = {Massively parallel measurements of molecular interaction kinetics on a microfluidic platform}, volume = {109}, issn = {0027-8424, 1091-6490}, url = {http://www.pnas.org/content/109/41/16540}, doi = {10.1073/pnas.1206011109}, abstract = {Quantitative biology requires quantitative data. No high-throughput technologies exist capable of obtaining several hundred independent kinetic binding measurements in a single experiment. We present an integrated microfluidic device (k-MITOMI) for the simultaneous kinetic characterization of 768 biomolecular interactions. We applied k-MITOMI to the kinetic analysis of transcription factor (TF)—DNA interactions, measuring the detailed kinetic landscapes of the mouse TF Zif268, and the yeast TFs Tye7p, Yox1p, and Tbf1p. We demonstrated the integrated nature of k-MITOMI by expressing, purifying, and characterizing 27 additional yeast transcription factors in parallel on a single device. Overall, we obtained 2,388 association and dissociation curves of 223 unique molecular interactions with equilibrium dissociation constants ranging from 2 × 10-6 M to 2 × 10-9 M, and dissociation rate constants of approximately 6 s-1 to 8.5 × 10-3 s-1. Association rate constants were uniform across 3 TF families, ranging from 3.7 × 106 M-1 s-1 to 9.6 × 107 M-1 s-1, and are well below the diffusion limit. We expect that k-MITOMI will contribute to our quantitative understanding of biological systems and accelerate the development and characterization of engineered systems.}, language = {en}, number = {41}, urldate = {2016-01-21}, journal = {Proceedings of the National Academy of Sciences}, author = {Geertz, Marcel and Shore, David and Maerkl, Sebastian J.}, month = oct, year = {2012}, pmid = {23012409}, keywords = {biochemistry, systems biology, biophysics}, pages = {16540--16545}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4MAQWT2T/Geertz et al. - 2012 - Massively parallel measurements of molecular inter.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8PQQMFX4/16540.html:text/html} } @article{maerkl_systems_2007, title = {A {Systems} {Approach} to {Measuring} the {Binding} {Energy} {Landscapes} of {Transcription} {Factors}}, volume = {315}, issn = {0036-8075, 1095-9203}, url = {http://classic.sciencemag.org/content/315/5809/233}, doi = {10.1126/science.1131007}, abstract = {A major goal of systems biology is to predict the function of biological networks. Although network topologies have been successfully determined in many cases, the quantitative parameters governing these networks generally have not. Measuring affinities of molecular interactions in high-throughput format remains problematic, especially for transient and low-affinity interactions. We describe a high-throughput microfluidic platform that measures such properties on the basis of mechanical trapping of molecular interactions. With this platform we characterized DNA binding energy landscapes for four eukaryotic transcription factors; these landscapes were used to test basic assumptions about transcription factor binding and to predict their in vivo function.}, language = {en}, number = {5809}, urldate = {2016-01-21}, journal = {Science}, author = {Maerkl, Sebastian J. and Quake, Stephen R.}, month = jan, year = {2007}, pmid = {17218526}, pages = {233--237}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RZ935GZR/Maerkl et Quake - 2007 - A Systems Approach to Measuring the Binding Energy.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XNNSJWSC/233.html:text/html} } @article{nelson_microfluidic_2013, title = {Microfluidic affinity and {ChIP}-seq analyses converge on a conserved {FOXP}2-binding motif in chimp and human, which enables the detection of evolutionarily novel targets}, volume = {41}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/41/12/5991}, doi = {10.1093/nar/gkt259}, abstract = {The transcription factor forkhead box P2 (FOXP2) is believed to be important in the evolution of human speech. A mutation in its DNA-binding domain causes severe speech impairment. Humans have acquired two coding changes relative to the conserved mammalian sequence. Despite intense interest in FOXP2, it has remained an open question whether the human protein’s DNA-binding specificity and chromatin localization are conserved. Previous in vitro and ChIP-chip studies have provided conflicting consensus sequences for the FOXP2-binding site. Using MITOMI 2.0 microfluidic affinity assays, we describe the binding site of FOXP2 and its affinity profile in base-specific detail for all substitutions of the strongest binding site. We find that human and chimp FOXP2 have similar binding sites that are distinct from previously suggested consensus binding sites. Additionally, through analysis of FOXP2 ChIP-seq data from cultured neurons, we find strong overrepresentation of a motif that matches our in vitro results and identifies a set of genes with FOXP2 binding sites. The FOXP2-binding sites tend to be conserved, yet we identified 38 instances of evolutionarily novel sites in humans. Combined, these data present a comprehensive portrait of FOXP2’s-binding properties and imply that although its sequence specificity has been conserved, some of its genomic binding sites are newly evolved.}, language = {en}, number = {12}, urldate = {2016-01-21}, journal = {Nucleic Acids Research}, author = {Nelson, Christopher S. and Fuller, Chris K. and Fordyce, Polly M. and Greninger, Alexander L. and Li, Hao and DeRisi, Joseph L.}, month = jul, year = {2013}, pmid = {23625967}, pages = {5991--6004}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/WF8QCD8N/Nelson et al. - 2013 - Microfluidic affinity and ChIP-seq analyses conver.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7KS9WRW9/5991.html:text/html} } @article{henikoff_epigenome_2011, title = {Epigenome characterization at single base-pair resolution}, volume = {108}, issn = {0027-8424, 1091-6490}, url = {http://www.pnas.org/content/108/45/18318}, doi = {10.1073/pnas.1110731108}, abstract = {We have combined standard micrococcal nuclease (MNase) digestion of nuclei with a modified protocol for constructing paired-end DNA sequencing libraries to map both nucleosomes and subnucleosome-sized particles at single base-pair resolution throughout the budding yeast genome. We found that partially unwrapped nucleosomes and subnucleosome-sized particles can occupy the same position within a cell population, suggesting dynamic behavior. By varying the time of MNase digestion, we have been able to observe changes that reflect differential sensitivity of particles, including the eviction of nucleosomes. To characterize DNA-binding features of transcription factors, we plotted the length of each fragment versus its position in the genome, which defined the minimal protected region of each factor. This process led to the precise mapping of protected and exposed regions at and around binding sites, and also determination of the degree to which they are flanked by phased nucleosomes and subnucleosome-sized particles. Our protocol and mapping method provide a general strategy for epigenome characterization, including nucleosome phasing and dynamics, ATP-dependent nucleosome remodelers, and transcription factors, from a single-sequenced sample.}, language = {en}, number = {45}, urldate = {2016-01-27}, journal = {Proceedings of the National Academy of Sciences}, author = {Henikoff, Jorja G. and Belsky, Jason A. and Krassovsky, Kristina and MacAlpine, David M. and Henikoff, Steven}, month = nov, year = {2011}, pmid = {22025700}, keywords = {Saccharomyces cerevisiae, V-plot, transcription factor binding sites}, pages = {18318--18323}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MU77SWCN/Henikoff et al. - 2011 - Epigenome characterization at single base-pair res.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FZPMDDMR/18318.html:text/html} } @article{dingwall_high_1981, title = {High sequence specificity of micrococcal nuclease}, volume = {9}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/9/12/2659}, doi = {10.1093/nar/9.12.2659}, abstract = {The substrate specificity of micrococcal nuclease (EC 3.1.4.7) has been studied. The enzyme recognises features of nucleotide composition, nucleotide sequence and tertiary structure of DNA. Kinetic analysis indicates that the rate of cleavage is 30 times greater at the 5′ side of A or T than at G or C. Digestion of end-labelled linear DNA molecules of known sequence revealed that only a limited number of sites are cut, generating a highly specific pattern of fragments. The frequency of cleavage at each site has been determined and it may reflect the poor base overlap in the 5′ T-A 3′ stack as well as the length of contiguous A and T residues. The same sequence preferences are found when DNA is assembled into nucleosomes. Deoxyribonuclease 1 (EC 3.1.4.5.) recognises many of the same sequence features. Micrococcal nuclease also mimics nuclease S1 selectively cleaving an inverted repeat in supercoiled pBR322. The value of micrococcal nuclease as a “non-specific” enzymatic probe for studying nucleosome phasing is questioned.}, language = {en}, number = {12}, urldate = {2016-01-27}, journal = {Nucleic Acids Research}, author = {Dingwall, Colin and Lomonossoff, George P. and Laskey, Ronald A.}, month = jun, year = {1981}, pmid = {6269057}, pages = {2659--2674}, file = {Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ZP7H67F3/2659.html:text/html} } @article{jiang_nucleosome_2009, title = {Nucleosome positioning and gene regulation: advances through genomics}, volume = {10}, copyright = {© 2009 Nature Publishing Group}, issn = {1471-0056}, shorttitle = {Nucleosome positioning and gene regulation}, url = {http://www.nature.com/nrg/journal/v10/n3/full/nrg2522.html}, doi = {10.1038/nrg2522}, abstract = {Knowing the precise locations of nucleosomes in a genome is key to understanding how genes are regulated. Recent 'next generation' ChIP–chip and ChIP–Seq technologies have accelerated our understanding of the basic principles of chromatin organization. Here we discuss what high-resolution genome-wide maps of nucleosome positions have taught us about how nucleosome positioning demarcates promoter regions and transcriptional start sites, and how the composition and structure of promoter nucleosomes facilitate or inhibit transcription. A detailed picture is starting to emerge of how diverse factors, including underlying DNA sequences and chromatin remodelling complexes, influence nucleosome positioning.}, language = {en}, number = {3}, urldate = {2016-01-28}, journal = {Nature Reviews Genetics}, author = {Jiang, Cizhong and Pugh, B. Franklin}, month = mar, year = {2009}, pages = {161--172}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/G8FIE4U4/Jiang et Pugh - 2009 - Nucleosome positioning and gene regulation advanc.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FGDCABJF/nrg2522.html:text/html} } @article{horz_sequence_1981, title = {Sequence specific cleavage of {DNA} by micrococcal nuclease}, volume = {9}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/9/12/2643}, doi = {10.1093/nar/9.12.2643}, abstract = {Micrococcal nuclease is shown to cleave DNA under conditions of partial digestion in a specific manner. Sequences of the type 5′CATA and 5′CTA are attacked preferentially, followed by exonucleoTytic degradation at the newly generated DNA termini. GC-rich flanking sequences further increase the probability of initial attack. Unexpectedly, long stretches containing only A and T are spared by the nuclease. These results, which were obtained with mouse satellite DNA and two fragments from the plasmid pBR322, do not support the previous contention that it is the regions of high AT-content which are initially cleaved by micrococcal nuclease. This specificity of micrococcal nuclease complicates its use in experiments intended to monitor the nucleoprotein structure of a DNA sequence in chromatin.}, language = {en}, number = {12}, urldate = {2016-01-27}, journal = {Nucleic Acids Research}, author = {Hörz, Wolfram and Altenburger, Werner}, month = jun, year = {1981}, pmid = {7279658}, pages = {2643--2658}, file = {Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MBQQBQZW/2643.html:text/html} } @article{kaplan_contribution_2010, title = {Contribution of histone sequence preferences to nucleosome organization: proposed definitions and methodology}, volume = {11}, copyright = {2010 BioMed Central Ltd}, issn = {1465-6906}, shorttitle = {Contribution of histone sequence preferences to nucleosome organization}, url = {http://genomebiology.com/2010/11/11/140/abstract}, doi = {10.1186/gb-2010-11-11-140}, abstract = {We propose definitions and procedures for comparing nucleosome maps and discuss current agreement and disagreement on the effect of histone sequence preferences on nucleosome organization in vivo.}, language = {en}, number = {11}, urldate = {2016-01-27}, journal = {Genome Biology}, author = {Kaplan, Noam and Hughes, Timothy R. and Lieb, Jason D. and Widom, Jonathan and Segal, Eran}, month = nov, year = {2010}, pmid = {21118582}, pages = {140}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8QTUCP9V/Kaplan et al. - 2010 - Contribution of histone sequence preferences to nu.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/37AT8SVN/140.html:text/html} } @article{sung_dnase_2014, title = {{DNase} {Footprint} {Signatures} {Are} {Dictated} by {Factor} {Dynamics} and {DNA} {Sequence}}, volume = {56}, issn = {1097-2765}, url = {http://www.cell.com/article/S1097276514006716/abstract}, doi = {10.1016/j.molcel.2014.08.016}, abstract = {Genomic footprinting has emerged as an unbiased discovery method for transcription factor (TF) occupancy at cognate DNA in vivo. A basic premise of footprinting is that sequence-specific TF-DNA interactions are associated with localized resistance to nucleases, leaving observable signatures of cleavage within accessible chromatin. This phenomenon is interpreted to imply protection of the critical nucleotides by the stably bound protein factor. However, this model conflicts with previous reports of many TFs exchanging with specific binding sites in living cells on a timescale of seconds. We show that TFs with short DNA residence times have no footprints at bound motif elements. Moreover, the nuclease cleavage profile within a footprint originates from the DNA sequence in the factor-binding site, rather than from the protein occupying specific nucleotides. These findings suggest a revised understanding of TF footprinting and reveal limitations in comprehensive reconstruction of the TF regulatory network using this approach.}, language = {English}, number = {2}, urldate = {2016-01-27}, journal = {Molecular Cell}, author = {Sung, Myong-Hee and Guertin, Michael J. and Baek, Songjoon and Hager, Gordon L.}, month = oct, year = {2014}, pmid = {25242143}, pages = {275--285}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XHCZQETM/Sung et al. - 2014 - DNase Footprint Signatures Are Dictated by Factor .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/W2QU5DXX/S1097-2765(14)00671-6.html:text/html} } @article{chen_improved_2014, title = {Improved nucleosome-positioning algorithm {iNPS} for accurate nucleosome positioning from sequencing data}, volume = {5}, copyright = {© 2014 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, url = {http://www.nature.com/ncomms/2014/140918/ncomms5909/full/ncomms5909.html}, doi = {10.1038/ncomms5909}, abstract = {Accurate determination of genome-wide nucleosome positioning can provide important insights into global gene regulation. Here, we describe the development of an improved nucleosome-positioning algorithm—iNPS—which achieves significantly better performance than the widely used NPS package. By determining nucleosome boundaries more precisely and merging or separating shoulder peaks based on local ​MNase-seq signals, iNPS can unambiguously detect 60\% more nucleosomes. The detected nucleosomes display better nucleosome ‘widths’ and neighbouring centre–centre distance distributions, giving rise to sharper patterns and better phasing of average nucleosome profiles and higher consistency between independent data subsets. In addition to its unique advantage in classifying nucleosomes by shape to reveal their different biological properties, iNPS also achieves higher significance and lower false positive rates than previously published methods. The application of iNPS to T-cell activation data demonstrates a greater ability to facilitate detection of nucleosome repositioning, uncovering additional biological features underlying the activation process.}, language = {en}, urldate = {2016-01-27}, journal = {Nature Communications}, author = {Chen, Weizhong and Liu, Yi and Zhu, Shanshan and Green, Christopher D. and Wei, Gang and Han, Jing-Dong Jackie}, month = sep, year = {2014}, keywords = {Biological sciences, bioinformatics}, pages = {4909}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ICZIKCZR/Chen et al. - 2014 - Improved nucleosome-positioning algorithm iNPS for.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/AAARM58C/ncomms5909.html:text/html} } @article{weirauch_determination_2014, title = {Determination and {Inference} of {Eukaryotic} {Transcription} {Factor} {Sequence} {Specificity}}, volume = {158}, issn = {0092-8674}, url = {http://www.sciencedirect.com/science/article/pii/S0092867414010368}, doi = {10.1016/j.cell.2014.08.009}, abstract = {Summary Transcription factor (TF) DNA sequence preferences direct their regulatory activity, but are currently known for only ∼1\% of eukaryotic TFs. Broadly sampling DNA-binding domain (DBD) types from multiple eukaryotic clades, we determined DNA sequence preferences for \>1,000 TFs encompassing 54 different DBD classes from 131 diverse eukaryotes. We find that closely related DBDs almost always have very similar DNA sequence preferences, enabling inference of motifs for ∼34\% of the ∼170,000 known or predicted eukaryotic TFs. Sequences matching both measured and inferred motifs are enriched in chromatin immunoprecipitation sequencing (ChIP-seq) peaks and upstream of transcription start sites in diverse eukaryotic lineages. SNPs defining expression quantitative trait loci in Arabidopsis promoters are also enriched for predicted TF binding sites. Importantly, our motif “library” can be used to identify specific TFs whose binding may be altered by human disease risk alleles. These data present a powerful resource for mapping transcriptional networks across eukaryotes.}, number = {6}, urldate = {2016-01-27}, journal = {Cell}, author = {Weirauch, Matthew T. and Yang, Ally and Albu, Mihai and Cote, Atina G. and Montenegro-Montero, Alejandro and Drewe, Philipp and Najafabadi, Hamed S. and Lambert, Samuel A. and Mann, Ishminder and Cook, Kate and Zheng, Hong and Goity, Alejandra and van Bakel, Harm and Lozano, Jean-Claude and Galli, Mary and Lewsey, Mathew G. and Huang, Eryong and Mukherjee, Tuhin and Chen, Xiaoting and Reece-Hoyes, John S. and Govindarajan, Sridhar and Shaulsky, Gad and Walhout, Albertha J. M. and Bouget, François-Yves and Ratsch, Gunnar and Larrondo, Luis F. and Ecker, Joseph R. and Hughes, Timothy R.}, month = sep, year = {2014}, pages = {1431--1443}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5XR4BSK6/Weirauch et al. - 2014 - Determination and Inference of Eukaryotic Transcri.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/B6FAA9XX/S0092867414010368.html:text/html} } @article{sherwood_discovery_2014, title = {Discovery of directional and nondirectional pioneer transcription factors by modeling {DNase} profile magnitude and shape}, volume = {32}, copyright = {© 2013 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1087-0156}, url = {http://www.nature.com/nbt/journal/v32/n2/full/nbt.2798.html}, doi = {10.1038/nbt.2798}, abstract = {We describe protein interaction quantitation (PIQ), a computational method for modeling the magnitude and shape of genome-wide DNase I hypersensitivity profiles to identify transcription factor (TF) binding sites. Through the use of machine-learning techniques, PIQ identified binding sites for {\textgreater}700 TFs from one DNase I hypersensitivity analysis followed by sequencing (DNase-seq) experiment with accuracy comparable to that of chromatin immunoprecipitation followed by sequencing (ChIP-seq). We applied PIQ to analyze DNase-seq data from mouse embryonic stem cells differentiating into prepancreatic and intestinal endoderm. We identified 120 and experimentally validated eight 'pioneer' TF families that dynamically open chromatin. Four pioneer TF families only opened chromatin in one direction from their motifs. Furthermore, we identified 'settler' TFs whose genomic binding is principally governed by proximity to open chromatin. Our results support a model of hierarchical TF binding in which directional and nondirectional pioneer activity shapes the chromatin landscape for population by settler TFs.}, language = {en}, number = {2}, urldate = {2016-02-03}, journal = {Nature Biotechnology}, author = {Sherwood, Richard I. and Hashimoto, Tatsunori and O'Donnell, Charles W. and Lewis, Sophia and Barkal, Amira A. and van Hoff, John Peter and Karun, Vivek and Jaakkola, Tommi and Gifford, David K.}, month = feb, year = {2014}, keywords = {Embryonic stem cells, Genome informatics, Machine learning, Regulatory networks}, pages = {171--178}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/85GIH6AN/Sherwood et al. - 2014 - Discovery of directional and nondirectional pionee.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/B9U6VK4P/nbt.2798.html:text/html} } @article{kaplan_dna-encoded_2009, title = {The {DNA}-encoded nucleosome organization of a eukaryotic genome}, volume = {458}, copyright = {© 2008 Nature Publishing Group}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v458/n7236/full/nature07667.html}, doi = {10.1038/nature07667}, abstract = {Nucleosome organization is critical for gene regulation. In living cells this organization is determined by multiple factors, including the action of chromatin remodellers, competition with site-specific DNA-binding proteins, and the DNA sequence preferences of the nucleosomes themselves. However, it has been difficult to estimate the relative importance of each of these mechanisms in vivo, because in vivo nucleosome maps reflect the combined action of all influencing factors. Here we determine the importance of nucleosome DNA sequence preferences experimentally by measuring the genome-wide occupancy of nucleosomes assembled on purified yeast genomic DNA. The resulting map, in which nucleosome occupancy is governed only by the intrinsic sequence preferences of nucleosomes, is similar to in vivo nucleosome maps generated in three different growth conditions. In vitro, nucleosome depletion is evident at many transcription factor binding sites and around gene start and end sites, indicating that nucleosome depletion at these sites in vivo is partly encoded in the genome. We confirm these results with a micrococcal nuclease-independent experiment that measures the relative affinity of nucleosomes for {\textasciitilde}40,000 double-stranded 150-base-pair oligonucleotides. Using our in vitro data, we devise a computational model of nucleosome sequence preferences that is significantly correlated with in vivo nucleosome occupancy in Caenorhabditis elegans. Our results indicate that the intrinsic DNA sequence preferences of nucleosomes have a central role in determining the organization of nucleosomes in vivo.}, language = {en}, number = {7236}, urldate = {2016-02-23}, journal = {Nature}, author = {Kaplan, Noam and Moore, Irene K. and Fondufe-Mittendorf, Yvonne and Gossett, Andrea J. and Tillo, Desiree and Field, Yair and LeProust, Emily M. and Hughes, Timothy R. and Lieb, Jason D. and Widom, Jonathan and Segal, Eran}, month = mar, year = {2009}, pages = {362--366}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4KX9KHHJ/Kaplan et al. - 2009 - The DNA-encoded nucleosome organization of a eukar.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XI3XE3IK/nature07667.html:text/html} } @article{jolma_dna-binding_2013, title = {{DNA}-{Binding} {Specificities} of {Human} {Transcription} {Factors}}, volume = {152}, issn = {0092-8674}, url = {http://www.sciencedirect.com/science/article/pii/S0092867412014961}, doi = {10.1016/j.cell.2012.12.009}, abstract = {Summary Although the proteins that read the gene regulatory code, transcription factors (TFs), have been largely identified, it is not well known which sequences TFs can recognize. We have analyzed the sequence-specific binding of human TFs using high-throughput SELEX and ChIP sequencing. A total of 830 binding profiles were obtained, describing 239 distinctly different binding specificities. The models represent the majority of human TFs, approximately doubling the coverage compared to existing systematic studies. Our results reveal additional specificity determinants for a large number of factors for which a partial specificity was known, including a commonly observed A- or T-rich stretch that flanks the core motifs. Global analysis of the data revealed that homodimer orientation and spacing preferences, and base-stacking interactions, have a larger role in TF-DNA binding than previously appreciated. We further describe a binding model incorporating these features that is required to understand binding of TFs to DNA.}, number = {1–2}, urldate = {2016-02-29}, journal = {Cell}, author = {Jolma, Arttu and Yan, Jian and Whitington, Thomas and Toivonen, Jarkko and Nitta, Kazuhiro R. and Rastas, Pasi and Morgunova, Ekaterina and Enge, Martin and Taipale, Mikko and Wei, Gonghong and Palin, Kimmo and Vaquerizas, Juan M. and Vincentelli, Renaud and Luscombe, Nicholas M. and Hughes, Timothy R. and Lemaire, Patrick and Ukkonen, Esko and Kivioja, Teemu and Taipale, Jussi}, month = jan, year = {2013}, pages = {327--339}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7EIKAUV2/Jolma et al. - 2013 - DNA-Binding Specificities of Human Transcription F.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ECTCAT9X/S0092867412014961.html:text/html} } @article{duttke_human_2015, title = {Human {Promoters} {Are} {Intrinsically} {Directional}}, volume = {57}, issn = {1097-2765}, url = {http://www.sciencedirect.com/science/article/pii/S1097276514010077}, doi = {10.1016/j.molcel.2014.12.029}, abstract = {Summary Divergent transcription, in which reverse-oriented transcripts occur upstream of eukaryotic promoters in regions devoid of annotated genes, has been suggested to be a general property of active promoters. Here we show that the human basal RNA polymerase II transcriptional machinery and core promoter are inherently unidirectional and that reverse-oriented transcripts originate from their own cognate reverse-directed core promoters. In vitro transcription analysis and mapping of nascent transcripts in HeLa cells revealed that sequences at reverse start sites are similar to those of their forward counterparts. The use of DNase I accessibility to define proximal promoter borders revealed that about half of promoters are unidirectional and that unidirectional promoters are depleted at their upstream edges of reverse core promoter sequences and their associated chromatin features. Divergent transcription is thus not an inherent property of the transcription process but rather the consequence of the presence of both forward- and reverse-directed core promoters.}, number = {4}, urldate = {2016-03-01}, journal = {Molecular Cell}, author = {Duttke, Sascha H. C. and Lacadie, Scott A. and Ibrahim, Mahmoud M. and Glass, Christopher K. and Corcoran, David L. and Benner, Christopher and Heinz, Sven and Kadonaga, James T. and Ohler, Uwe}, month = feb, year = {2015}, pages = {674--684}, file = {ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4FF6Q3EJ/S1097276514010077\$.html:text/html} } @article{badis_library_2008, title = {A {Library} of {Yeast} {Transcription} {Factor} {Motifs} {Reveals} a {Widespread} {Function} for {Rsc}3 in {Targeting} {Nucleosome} {Exclusion} at {Promoters}}, volume = {32}, issn = {1097-2765}, url = {http://www.sciencedirect.com/science/article/pii/S1097276508008423}, doi = {10.1016/j.molcel.2008.11.020}, abstract = {Summary The sequence specificity of DNA-binding proteins is the primary mechanism by which the cell recognizes genomic features. Here, we describe systematic determination of yeast transcription factor DNA-binding specificities. We obtained binding specificities for 112 DNA-binding proteins representing 19 distinct structural classes. One-third of the binding specificities have not been previously reported. Several binding sequences have striking genomic distributions relative to transcription start sites, supporting their biological relevance and suggesting a role in promoter architecture. Among these are Rsc3 binding sequences, containing the core CGCG, which are found preferentially ∼100 bp upstream of transcription start sites. Mutation of RSC3 results in a dramatic increase in nucleosome occupancy in hundreds of proximal promoters containing a Rsc3 binding element, but has little impact on promoters lacking Rsc3 binding sequences, indicating that Rsc3 plays a broad role in targeting nucleosome exclusion at yeast promoters.}, number = {6}, urldate = {2016-08-09}, journal = {Molecular Cell}, author = {Badis, Gwenael and Chan, Esther T. and van Bakel, Harm and Pena-Castillo, Lourdes and Tillo, Desiree and Tsui, Kyle and Carlson, Clayton D. and Gossett, Andrea J. and Hasinoff, Michael J. and Warren, Christopher L. and Gebbia, Marinella and Talukder, Shaheynoor and Yang, Ally and Mnaimneh, Sanie and Terterov, Dimitri and Coburn, David and Li Yeo, Ai and Yeo, Zhen Xuan and Clarke, Neil D. and Lieb, Jason D. and Ansari, Aseem Z. and Nislow, Corey and Hughes, Timothy R.}, month = dec, year = {2008}, keywords = {DNA, PROTEINS}, pages = {878--887}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3827EM9J/Badis et al. - 2008 - A Library of Yeast Transcription Factor Motifs Rev.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8UHEJ37M/S1097276508008423.html:text/html} } @book{jolma_methods_2011, series = {Subcellular {Biochemistry}}, title = {Methods for {Analysis} of {Transcription} {Factor} {DNA}-{Binding} {Specificity} {In} {Vitro}}, copyright = {©2011 Springer Science+Business Media B.V.}, isbn = {978-90-481-9068-3 978-90-481-9069-0}, url = {http://link.springer.com/chapter/10.1007/978-90-481-9069-0_7}, abstract = {Transcription of genes during development and in response to environmental stimuli is determined by genomic DNA sequence. The DNA sequences regulating transcription are read by sequence-specific transcription factors (TFs) that recognize relatively short sequences, generally between four and twenty base pairs in length. Transcriptional regulation generally requires binding of multiple TFs in close proximity to each other. Mechanistic understanding of transcription in an organism thus requires detailed knowledge of binding affinities of all its TFs to all possible DNA sequences, and the co–operative interactions between the TFs. However, very little is known about such co-operative binding interactions, and even the simple TF-DNA binding information exists only for a very small proportion of all TFs – for example, mammals have approximately 1,300–2,000 TFs [1, 2], yet the largest public databases for TF binding specificity, Jaspar and Uniprobe [3, 4] currently list only approximately 500 moderate to high resolution profiles for human or mouse. This lack of knowledge is in part due to the fact that analysis of TF DNA binding has been laborious and expensive. In this chapter, we review methods that can be used to determine binding specificity of TFs to DNA, mainly focusing on recently developed assays that allow high-resolution analysis of TF binding specificity in relatively high throughput.}, language = {en}, number = {52}, urldate = {2016-03-09}, publisher = {Springer Netherlands}, author = {Jolma, Arttu and Taipale, Jussi}, editor = {Hughes, Timothy R.}, year = {2011}, doi = {10.1007/978-90-481-9069-0_7}, - keywords = {Affinity, Biochemistry, general, Biomedicine general, Cell Biology, Co-operative binding, Nucleic Acid Chemistry, Protein binding microarrays, Protein–DNA interactions, SELEX}, + keywords = {SELEX, Biomedicine general, Biochemistry, general, Nucleic Acid Chemistry, Cell Biology, Protein–DNA interactions, Co-operative binding, Affinity, Protein binding microarrays}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/N3SWG4KM/Jolma et Taipale - 2011 - Methods for Analysis of Transcription Factor DNA-B.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/K2P3P7G7/10.html:text/html} } @article{stormo_specificity_1998, title = {Specificity, free energy and information content in protein–{DNA} interactions}, volume = {23}, issn = {0968-0004}, url = {http://www.sciencedirect.com/science/article/pii/S0968000498011876}, doi = {10.1016/S0968-0004(98)01187-6}, abstract = {Site-specific DNA–protein interactions can be studied using experimental and computational methods. Experimental approaches typically analyze a protein–DNA interaction by measuring the free energy of binding under a variety of conditions. Computational methods focus on alignments of known binding sites for a protein, and, from these alignments, make estimates of the binding energy. Understanding the relationship between these two perspectives, and finding ways to improve both, is a major challenge of modern molecular biology.}, number = {3}, urldate = {2016-03-11}, journal = {Trends in Biochemical Sciences}, author = {Stormo, Gary D. and Fields, Dana S.}, month = mar, year = {1998}, keywords = {protein-DNA interaction, free energy of binding, equilibrium binding constant}, pages = {109--113}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HF85TI4K/Stormo et Fields - 1998 - Specificity, free energy and information content i.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/E84QCIUM/S0968000498011876.html:text/html} } @article{zhang_canonical_2014, title = {Canonical nucleosome organization at promoters forms during genome activation}, volume = {24}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/24/2/260}, doi = {10.1101/gr.157750.113}, abstract = {The organization of nucleosomes influences transcriptional activity by controlling accessibility of DNA binding proteins to the genome. Genome-wide nucleosome binding profiles have identified a canonical nucleosome organization at gene promoters, where arrays of well-positioned nucleosomes emanate from nucleosome-depleted regions. The mechanisms of formation and the function of canonical promoter nucleosome organization remain unclear. Here we analyze the genome-wide location of nucleosomes during zebrafish embryogenesis and show that well-positioned nucleosome arrays appear on thousands of promoters during the activation of the zygotic genome. The formation of canonical promoter nucleosome organization is independent of DNA sequence preference, transcriptional elongation, and robust RNA polymerase II (Pol II) binding. Instead, canonical promoter nucleosome organization correlates with the presence of histone H3 lysine 4 trimethylation (H3K4me3) and affects future transcriptional activation. These findings reveal that genome activation is central to the organization of nucleosome arrays during early embryogenesis.}, language = {en}, number = {2}, urldate = {2016-03-16}, journal = {Genome Research}, author = {Zhang, Yong and Vastenhouw, Nadine L. and Feng, Jianxing and Fu, Kai and Wang, Chenfei and Ge, Ying and Pauli, Andrea and Hummelen, Paul van and Schier, Alexander F. and Liu, X. Shirley}, month = feb, year = {2014}, pmid = {24285721}, pages = {260--266}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/WM4I4Q4W/Zhang et al. - 2014 - Canonical nucleosome organization at promoters for.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BPB52I5C/260.html:text/html} } @article{siebert_bayesian_2016, title = {Bayesian {Markov} models consistently outperform {PWMs} at predicting motifs in nucleotide sequences}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/early/2016/06/09/nar.gkw521}, doi = {10.1093/nar/gkw521}, abstract = {Position weight matrices (PWMs) are the standard model for DNA and RNA regulatory motifs. In PWMs nucleotide probabilities are independent of nucleotides at other positions. Models that account for dependencies need many parameters and are prone to overfitting. We have developed a Bayesian approach for motif discovery using Markov models in which conditional probabilities of order k − 1 act as priors for those of order k. This Bayesian Markov model (BaMM) training automatically adapts model complexity to the amount of available data. We also derive an EM algorithm for de-novo discovery of enriched motifs. For transcription factor binding, BaMMs achieve significantly (P = 1/16) higher cross-validated partial AUC than PWMs in 97\% of 446 ChIP-seq ENCODE datasets and improve performance by 36\% on average. BaMMs also learn complex multipartite motifs, improving predictions of transcription start sites, polyadenylation sites, bacterial pause sites, and RNA binding sites by 26–101\%. BaMMs never performed worse than PWMs. These robust improvements argue in favour of generally replacing PWMs by BaMMs.}, language = {en}, urldate = {2016-06-15}, journal = {Nucleic Acids Research}, author = {Siebert, Matthias and Söding, Johannes}, month = jun, year = {2016}, pmid = {27288444}, pages = {gkw521}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/WXRQ4RFI/Siebert et Söding - 2016 - Bayesian Markov models consistently outperform PWM.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QRW8B45T/nar.gkw521.html:text/html;Supplemental.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/79AXCN8I/Supplemental.pdf:application/pdf} } @article{gupta_quantifying_2007, title = {Quantifying similarity between motifs}, volume = {8}, issn = {1474-760X}, url = {http://dx.doi.org/10.1186/gb-2007-8-2-r24}, doi = {10.1186/gb-2007-8-2-r24}, abstract = {A common question within the context of de novo motif discovery is whether a newly discovered, putative motif resembles any previously discovered motif in an existing database. To answer this question, we define a statistical measure of motif-motif similarity, and we describe an algorithm, called Tomtom, for searching a database of motifs with a given query motif. Experimental simulations demonstrate the accuracy of Tomtom's E values and its effectiveness in finding similar motifs.}, urldate = {2016-05-17}, journal = {Genome Biology}, author = {Gupta, Shobhit and Stamatoyannopoulos, John A. and Bailey, Timothy L. and Noble, William Stafford}, year = {2007}, pages = {R24}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8BIHRUEV/Gupta et al. - 2007 - Quantifying similarity between motifs.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SFIWP7TF/gb-2007-8-2-r24.html:text/html} } @article{alipanahi_predicting_2015, title = {Predicting the sequence specificities of {DNA}- and {RNA}-binding proteins by deep learning}, volume = {33}, copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1087-0156}, url = {http://www.nature.com/nbt/journal/v33/n8/full/nbt.3300.html}, doi = {10.1038/nbt.3300}, abstract = {Knowing the sequence specificities of DNA- and RNA-binding proteins is essential for developing models of the regulatory processes in biological systems and for identifying causal disease variants. Here we show that sequence specificities can be ascertained from experimental data with 'deep learning' techniques, which offer a scalable, flexible and unified computational approach for pattern discovery. Using a diverse array of experimental data and evaluation metrics, we find that deep learning outperforms other state-of-the-art methods, even when training on in vitro data and testing on in vivo data. We call this approach DeepBind and have built a stand-alone software tool that is fully automatic and handles millions of sequences per experiment. Specificities determined by DeepBind are readily visualized as a weighted ensemble of position weight matrices or as a 'mutation map' that indicates how variations affect binding within a specific sequence.}, language = {en}, number = {8}, urldate = {2016-06-15}, journal = {Nature Biotechnology}, author = {Alipanahi, Babak and Delong, Andrew and Weirauch, Matthew T. and Frey, Brendan J.}, month = aug, year = {2015}, keywords = {Genome informatics, Computational biology and bioinformatics, Gene regulatory networks}, pages = {831--838}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GWV3VGUD/Alipanahi et al. - 2015 - Predicting the sequence specificities of DNA- and .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GDJRXKTR/nbt.3300.html:text/html} } @article{kumar_predicting_2016, title = {Predicting transcription factor site occupancy using {DNA} sequence intrinsic and cell-type specific chromatin features}, volume = {17}, issn = {1471-2105}, url = {http://dx.doi.org/10.1186/s12859-015-0846-z}, doi = {10.1186/s12859-015-0846-z}, abstract = {Understanding the mechanisms by which transcription factors (TF) are recruited to their physiological target sites is crucial for understanding gene regulation. DNA sequence intrinsic features such as predicted binding affinity are often not very effective in predicting in vivo site occupancy and in any case could not explain cell-type specific binding events. Recent reports show that chromatin accessibility, nucleosome occupancy and specific histone post-translational modifications greatly influence TF site occupancy in vivo. In this work, we use machine-learning methods to build predictive models and assess the relative importance of different sequence-intrinsic and chromatin features in the TF-to-target-site recruitment process.}, number = {1}, urldate = {2016-06-15}, journal = {BMC Bioinformatics}, author = {Kumar, Sunil and Bucher, Philipp}, year = {2016}, pages = {41--50}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/M45S496M/Kumar et Bucher - 2016 - Predicting transcription factor site occupancy usi.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/M9Q6XTG4/s12859-015-0846-z.html:text/html} } @article{sharon_feature-based_2008, title = {A {Feature}-{Based} {Approach} to {Modeling} {Protein}–{DNA} {Interactions}}, volume = {4}, issn = {1553-7358}, url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000154}, doi = {10.1371/journal.pcbi.1000154}, abstract = {Author Summary Transcription factor (TF) protein binding to its DNA target sequences is a fundamental physical interaction underlying gene regulation. Characterizing the binding specificities of TFs is essential for deducing which genes are regulated by which TFs. Recently, several high-throughput methods that measure sequences enriched for TF targets genomewide were developed. Since TFs recognize relatively short sequences, much effort has been directed at developing computational methods that identify enriched subsequences (motifs) from these sequences. However, little effort has been directed towards improving the representation of motifs. Practically, available motif finding software use the position specific scoring matrix (PSSM) model, which assumes independence between different motif positions. We present an alternative, richer model, called the feature motif model (FMM), that enables the representation of a variety of sequence features and captures dependencies that exist between binding site positions. We show how FMMs explain TF binding data better than PSSMs on both synthetic and real data. We also present a motif finder algorithm that learns FMM motifs from unaligned promoter sequences and show how de novo FMMs, learned from binding data of the human TFs c-Myc and CTCF, reveal intriguing insights about their binding specificities.}, number = {8}, urldate = {2016-06-16}, journal = {PLOS Comput Biol}, author = {Sharon, Eilon and Lubliner, Shai and Segal, Eran}, year = {2008}, keywords = {Transcription Factors, Sequence motif analysis, Sequence alignment, Algorithms, Markov models, Network motifs, Probability distribution, DNA-binding proteins}, pages = {e1000154}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/K9B28TX5/Sharon et al. - 2008 - A Feature-Based Approach to Modeling Protein–DNA I.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/H3U7TP6B/article.html:text/html} } @article{tsai_contribution_2015, title = {Contribution of {Sequence} {Motif}, {Chromatin} {State}, and {DNA} {Structure} {Features} to {Predictive} {Models} of {Transcription} {Factor} {Binding} in {Yeast}}, volume = {11}, issn = {1553-7358}, url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004418}, doi = {10.1371/journal.pcbi.1004418}, abstract = {Author Summary Identification of transcription factor binding sites based on sequence motifs is typically accompanied by a high false positive rate. Increasing evidence suggests that there are many other factors besides DNA sequence that may affect the binding and interaction of TFs with DNA. Through the integration of sequence motif, chromatin state, and DNA structure properties, we show that TF binding can be better predicted. Moreover, considering chromatin state and DNA structure properties simultaneously yields a significant improvement. While the binding of some TFs can be readily predicted using either chromatin state information or DNA structure, other TFs need both. Thus, our findings provide insights on how different histone modifications and DNA structure properties may influence the binding of a particular TF and thus how TFs regulate gene expression. These features are referred to as sequence “intrinsic properties” because they can be predicted from sequences alone. These intrinsic properties can be used to build a TF binding prediction model that has a similar performance to considering all features. Moreover, the intrinsic property model allows TFBS predictions not only across TFs, but also across DNA-binding domain families that are present in most eukaryotes, suggesting that the model likely can be used across species.}, number = {8}, urldate = {2016-06-16}, journal = {PLOS Comput Biol}, author = {Tsai, Zing Tsung-Yeh and Shiu, Shin-Han and Tsai, Huai-Kuang}, year = {2015}, keywords = {Chromatin, Sequence motif analysis, Forecasting, DNA structure, Nucleosomes, Gene expression, Histones, DNA sequence analysis}, pages = {e1004418}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UZW539AW/Tsai et al. - 2015 - Contribution of Sequence Motif, Chromatin State, a.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RNZ4GZS8/article.html:text/html} } @article{sunnaker_approximate_2013, title = {Approximate {Bayesian} {Computation}}, volume = {9}, issn = {1553-7358}, url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1002803}, doi = {10.1371/journal.pcbi.1002803}, abstract = {Approximate Bayesian computation (ABC) constitutes a class of computational methods rooted in Bayesian statistics . In all model-based statistical inference , the likelihood function is of central importance, since it expresses the probability of the observed data under a particular statistical model, and thus quantifies the support data lend to particular values of parameters and to choices among different models. For simple models, an analytical formula for the likelihood function can typically be derived. However, for more complex models, an analytical formula might be elusive or the likelihood function might be computationally very costly to evaluate. ABC methods bypass the evaluation of the likelihood function. In this way, ABC methods widen the realm of models for which statistical inference can be considered. ABC methods are mathematically well-founded, but they inevitably make assumptions and approximations whose impact needs to be carefully assessed. Furthermore, the wider application domain of ABC exacerbates the challenges of parameter estimation and model selection . ABC has rapidly gained popularity over the last years and in particular for the analysis of complex problems arising in biological sciences (e.g., in population genetics , ecology , epidemiology , and systems biology ).}, number = {1}, urldate = {2016-06-24}, journal = {PLOS Comput Biol}, author = {Sunnåker, Mikael and Busetto, Alberto Giovanni and Numminen, Elina and Corander, Jukka and Foll, Matthieu and Dessimoz, Christophe}, month = jan, year = {2013}, keywords = {Algorithms, Computer software, Simulation and modeling, Statistical inference, Quality control, Statistical distributions, Statistical models, Population genetics}, pages = {e1002803}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ACGRZCFG/Sunnåker et al. - 2013 - Approximate Bayesian Computation.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/F3PCJQ2X/article.html:text/html} } @article{cheng_understanding_2012, title = {Understanding transcriptional regulation by integrative analysis of transcription factor binding data}, volume = {22}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/22/9/1658}, doi = {10.1101/gr.136838.111}, abstract = {Statistical models have been used to quantify the relationship between gene expression and transcription factor (TF) binding signals. Here we apply the models to the large-scale data generated by the ENCODE project to study transcriptional regulation by TFs. Our results reveal a notable difference in the prediction accuracy of expression levels of transcription start sites (TSSs) captured by different technologies and RNA extraction protocols. In general, the expression levels of TSSs with high CpG content are more predictable than those with low CpG content. For genes with alternative TSSs, the expression levels of downstream TSSs are more predictable than those of the upstream ones. Different TF categories and specific TFs vary substantially in their contributions to predicting expression. Between two cell lines, the differential expression of TSS can be precisely reflected by the difference of TF-binding signals in a quantitative manner, arguing against the conventional on-and-off model of TF binding. Finally, we explore the relationships between TF-binding signals and other chromatin features such as histone modifications and DNase hypersensitivity for determining expression. The models imply that these features regulate transcription in a highly coordinated manner.}, language = {en}, number = {9}, urldate = {2016-07-07}, journal = {Genome Research}, author = {Cheng, Chao and Alexander, Roger and Min, Renqiang and Leng, Jing and Yip, Kevin Y. and Rozowsky, Joel and Yan, Koon-Kiu and Dong, Xianjun and Djebali, Sarah and Ruan, Yijun and Davis, Carrie A. and Carninci, Piero and Lassman, Timo and Gingeras, Thomas R. and Guigó, Roderic and Birney, Ewan and Weng, Zhiping and Snyder, Michael and Gerstein, Mark}, month = sep, year = {2012}, pmid = {22955978}, pages = {1658--1667}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8PDJPINK/Cheng et al. - 2012 - Understanding transcriptional regulation by integr.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QUK6KTKM/1658.html:text/html;Supplemental_Figures_and_Tables.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6WQC2A9C/Supplemental_Figures_and_Tables.pdf:application/pdf} } @article{gordan_distinguishing_2009, title = {Distinguishing direct versus indirect transcription factor–{DNA} interactions}, volume = {19}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/19/11/2090}, doi = {10.1101/gr.094144.109}, abstract = {Transcriptional regulation is largely enacted by transcription factors (TFs) binding DNA. Large numbers of TF binding motifs have been revealed by ChIP-chip experiments followed by computational DNA motif discovery. However, the success of motif discovery algorithms has been limited when applied to sequences bound in vivo (such as those identified by ChIP-chip) because the observed TF–DNA interactions are not necessarily direct: Some TFs predominantly associate with DNA indirectly through protein partners, while others exhibit both direct and indirect binding. Here, we present the first method for distinguishing between direct and indirect TF–DNA interactions, integrating in vivo TF binding data, in vivo nucleosome occupancy data, and motifs from in vitro protein binding microarray experiments. When applied to yeast ChIP-chip data, our method reveals that only 48\% of the data sets can be readily explained by direct binding of the profiled TF, while 16\% can be explained by indirect DNA binding. In the remaining 36\%, none of the motifs used in our analysis was able to explain the ChIP-chip data, either because the data were too noisy or because the set of motifs was incomplete. As more in vitro TF DNA binding motifs become available, our method could be used to build a complete catalog of direct and indirect TF–DNA interactions. Our method is not restricted to yeast or to ChIP-chip data, but can be applied in any system for which both in vivo binding data and in vitro DNA binding motifs are available.}, language = {en}, number = {11}, urldate = {2016-07-11}, journal = {Genome Research}, author = {Gordân, Raluca and Hartemink, Alexander J. and Bulyk, Martha L.}, month = nov, year = {2009}, pmid = {19652015}, pages = {2090--2100}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/G35V4FIF/Gordân et al. - 2009 - Distinguishing direct versus indirect transcriptio.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Q4K9GBD5/2090.html:text/html} } @article{badis_diversity_2009, title = {Diversity and {Complexity} in {DNA} {Recognition} by {Transcription} {Factors}}, volume = {324}, copyright = {Copyright © 2009, American Association for the Advancement of Science}, issn = {0036-8075, 1095-9203}, url = {http://science.sciencemag.org/content/324/5935/1720}, doi = {10.1126/science.1162327}, abstract = {Transcriptional Regulation Gets More Complicated Sequence preferences of DNA binding proteins are a primary mechanism by which cells interpret the genome. A central goal in genome biology is to identify regulatory sequences in the genome; however, few proteins' DNA binding specificities have been characterized comprehensively. Badis et al. (p. 1720, published online 14 May) studied 104 known and predicted transcription factors (TFs), spanning 22 structural classes, in the mouse genome. While traditional models of TF binding sites are based on a single collection of highly similar DNA sequences, binding profiles were represented better by multiple motifs. Roughly half of the TFs recognized distinct primary and secondary motifs that are different from each other. At least some of these interaction modes appeared to be attributable to biophysically distinct protein conformations, adding to the complexity of transcriptional regulation. Sequence preferences of DNA binding proteins are a primary mechanism by which cells interpret the genome. Despite the central importance of these proteins in physiology, development, and evolution, comprehensive DNA binding specificities have been determined experimentally for only a few proteins. Here, we used microarrays containing all 10–base pair sequences to examine the binding specificities of 104 distinct mouse DNA binding proteins representing 22 structural classes. Our results reveal a complex landscape of binding, with virtually every protein analyzed possessing unique preferences. Roughly half of the proteins each recognized multiple distinctly different sequence motifs, challenging our molecular understanding of how proteins interact with their DNA binding sites. This complexity in DNA recognition may be important in gene regulation and in the evolution of transcriptional regulatory networks. A broad survey of transcription factors reveals that related proteins can have multiple and differing DNA binding specificities. A broad survey of transcription factors reveals that related proteins can have multiple and differing DNA binding specificities.}, language = {en}, number = {5935}, urldate = {2016-07-11}, journal = {Science}, author = {Badis, Gwenael and Berger, Michael F. and Philippakis, Anthony A. and Talukder, Shaheynoor and Gehrke, Andrew R. and Jaeger, Savina A. and Chan, Esther T. and Metzler, Genita and Vedenko, Anastasia and Chen, Xiaoyu and Kuznetsov, Hanna and Wang, Chi-Fong and Coburn, David and Newburger, Daniel E. and Morris, Quaid and Hughes, Timothy R. and Bulyk, Martha L.}, month = jun, year = {2009}, pmid = {19443739}, pages = {1720--1723}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HZKW3MWF/Badis et al. - 2009 - Diversity and Complexity in DNA Recognition by Tra.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/EEUGZASQ/1720.html:text/html} } @article{carlson_specificity_2010, title = {Specificity landscapes of {DNA} binding molecules elucidate biological function}, volume = {107}, issn = {0027-8424, 1091-6490}, url = {http://www.pnas.org/content/107/10/4544}, doi = {10.1073/pnas.0914023107}, abstract = {Evaluating the specificity spectra of DNA binding molecules is a nontrivial challenge that hinders the ability to decipher gene regulatory networks or engineer molecules that act on genomes. Here we compare the DNA sequence specificities for different classes of proteins and engineered DNA binding molecules across the entire sequence space. These high-content data are visualized and interpreted using an interactive “specificity landscape” which simultaneously displays the affinity and specificity of a million-plus DNA sequences. Contrary to expectation, specificity landscapes reveal that synthetic DNA ligands match, and often surpass, the specificities of eukaryotic DNA binding proteins. The landscapes also identify differential specificity constraints imposed by diverse structural folds of natural and synthetic DNA binders. Importantly, the sequence context of a binding site significantly influences binding energetics, and utilizing the full contextual information permits greater accuracy in annotating regulatory elements within a given genome. Assigning such context-dependent binding values to every DNA sequence across the genome yields predictive genome-wide binding landscapes (genomescapes). A genomescape of a synthetic DNA binding molecule provided insight into its differential regulatory activity in cultured cells. The approach we describe will accelerate the creation of precision-tailored DNA therapeutics and uncover principles that govern sequence-specificity of DNA binding molecules.}, language = {en}, number = {10}, urldate = {2016-07-11}, journal = {Proceedings of the National Academy of Sciences}, author = {Carlson, Clayton D. and Warren, Christopher L. and Hauschild, Karl E. and Ozers, Mary S. and Qadir, Naveeda and Bhimsaria, Devesh and Lee, Youngsook and Cerrina, Franco and Ansari, Aseem Z.}, month = mar, year = {2010}, pmid = {20176964}, keywords = {chemical genomics, Cognate Site Identification, DNA binders, genomescapes, Energy Landscapes}, pages = {4544--4549}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3Z8DU24S/Carlson et al. - 2010 - Specificity landscapes of DNA binding molecules el.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XKEEFARQ/4544.html:text/html} } @article{benos_additivity_2002, title = {Additivity in protein–{DNA} interactions: how good an approximation is it?}, volume = {30}, issn = {0305-1048, 1362-4962}, shorttitle = {Additivity in protein–{DNA} interactions}, url = {http://nar.oxfordjournals.org/content/30/20/4442}, doi = {10.1093/nar/gkf578}, abstract = {Man and Stormo and Bulyk et al. recently presented their results on the study of the DNA binding affinity of proteins. In both of these studies the main conclusion is that the additivity assumption, usually applied in methods to search for binding sites, is not true. In the first study, the analysis of binding affinity data from the Mnt repressor protein bound to all possible DNA (sub)targets at positions 16 and 17 of the binding site, showed that those positions are not independent. In the second study, the authors analysed DNA binding affinity data of the wild‐type mouse EGR1 protein and four variants differing on the middle finger. The binding affinity of these proteins was measured to all 64 possible trinucleotide (sub)targets of the middle finger using microarray technology. The analysis of the measurements also showed interdependence among the positions in the DNA target. In the present report, we review the data of both studies and we re‐ analyse them using various statistical methods, including a comparison with a multiple regression approach. We conclude that despite the fact that the additivity assumption does not fit the data perfectly, in most cases it provides a very good approximation of the true nature of the specific protein–DNA interactions. Therefore, additive models can be very useful for the discovery and prediction of binding sites in genomic DNA.}, language = {en}, number = {20}, urldate = {2016-07-11}, journal = {Nucleic Acids Research}, author = {Benos, Panayiotis V. and Bulyk, Martha L. and Stormo, Gary D.}, month = oct, year = {2002}, pmid = {12384591}, pages = {4442--4451}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9NNEN6A3/Benos et al. - 2002 - Additivity in protein–DNA interactions how good a.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/D8CV39DE/4442.html:text/html} } @article{bulyk_nucleotides_2002, title = {Nucleotides of transcription factor binding sites exert interdependent effects on the binding affinities of transcription factors}, volume = {30}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/30/5/1255}, doi = {10.1093/nar/30.5.1255}, abstract = {We can determine the effects of many possible sequence variations in transcription factor binding sites using microarray binding experiments. Analysis of wild-type and mutant Zif268 (Egr1) zinc fingers bound to microarrays containing all possible central 3 bp triplet binding sites indicates that the nucleotides of transcription factor binding sites cannot be treated independently. This indicates that the current practice of characterizing transcription factor binding sites by mutating individual positions of binding sites one base pair at a time does not provide a true picture of the sequence specificity. Similarly, current bioinformatic practices using either just a consensus sequence, or even mononucleotide frequency weight matrices to provide more complete descriptions of transcription factor binding sites, are not accurate in depicting the true binding site specificities, since these methods rely upon the assumption that the nucleotides of binding sites exert independent effects on binding affinity. Our results stress the importance of complete reference tables of all possible binding sites for comparing protein binding preferences for various DNA sequences. We also show results suggesting that microarray binding data using particular subsets of all possible binding sites can be used to extrapolate the relative binding affinities of all possible full-length binding sites, given a known binding site for use as a starting sequence for site preference refinement.}, language = {en}, number = {5}, urldate = {2016-07-12}, journal = {Nucleic Acids Research}, author = {Bulyk, Martha L. and Johnson, Philip L. F. and Church, George M.}, month = mar, year = {2002}, pmid = {11861919}, pages = {1255--1261}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GW392C6E/Bulyk et al. - 2002 - Nucleotides of transcription factor binding sites .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/553SQDSR/1255.html:text/html} } @article{gershenzon_computational_2005, title = {Computational technique for improvement of the position-weight matrices for the {DNA}/protein binding sites}, volume = {33}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/33/7/2290}, doi = {10.1093/nar/gki519}, abstract = {Position-weight matrices (PWMs) are broadly used to locate transcription factor binding sites in DNA sequences. The majority of existing PWMs provide a low level of both sensitivity and specificity. We present a new computational algorithm, a modification of the Staden–Bucher approach, that improves the PWM. We applied the proposed technique on the PWM of the GC-box, binding site for Sp1. The comparison of old and new PWMs shows that the latter increase both sensitivity and specificity. The statistical parameters of GC-box distribution in promoter regions and in the human genome, as well as in each chromosome, are presented. The majority of commonly used PWMs are the 4-row mononucleotide matrices, although 16-row dinucleotide matrices are known to be more informative. The algorithm efficiently determines the 16-row matrices and preliminary results show that such matrices provide better results than 4-row matrices.}, language = {en}, number = {7}, urldate = {2016-07-12}, journal = {Nucleic Acids Research}, author = {Gershenzon, Naum I. and Stormo, Gary D. and Ioshikhes, Ilya P.}, month = jan, year = {2005}, pmid = {15849315}, pages = {2290--2301}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FJ85FE73/Gershenzon et al. - 2005 - Computational technique for improvement of the pos.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3GJ3CQ3F/2290.html:text/html} } @article{djordjevic_biophysical_2003, title = {A {Biophysical} {Approach} to {Transcription} {Factor} {Binding} {Site} {Discovery}}, volume = {13}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/13/11/2381}, doi = {10.1101/gr.1271603}, abstract = {Identification of transcription factor binding sites within regulatory segments of genomic DNA is an important step toward understanding of the regulatory circuits that control expression of genes. Here, we describe a novel bioinformatics method that bases classification of potential binding sites explicitly on the estimate of sequence-specific binding energy of a given transcription factor. The method also estimates the chemical potential of the factor that defines the threshold of binding. In contrast with the widely used information-theoretic weight matrix method, the new approach correctly describes saturation in the transcription factor/DNA binding probability. This results in a significant improvement in the number of expected false positives, particularly in the ubiquitous case of low-specificity factors. In the strong binding limit, the algorithm is related to the “support vector machine” approach to pattern recognition. The new method is used to identify likely genomic binding sites for the E. coli transcription factors collected in the DPInteract database. In addition, for CRP (a global regulatory factor), the likely regulatory modality (i.e., repressor or activator) of predicted binding sites is determined.}, language = {en}, number = {11}, urldate = {2016-08-03}, journal = {Genome Research}, author = {Djordjevic, Marko and Sengupta, Anirvan M. and Shraiman, Boris I.}, month = nov, year = {2003}, pmid = {14597652}, pages = {2381--2390}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UT9VDSVF/Djordjevic et al. - 2003 - A Biophysical Approach to Transcription Factor Bin.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/J8XU8D44/2381.html:text/html} } @article{liu_whole-genome_2006, title = {Whole-genome comparison of {Leu}3 binding in vitro and in vivo reveals the importance of nucleosome occupancy in target site selection}, volume = {16}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/16/12/1517}, doi = {10.1101/gr.5655606}, abstract = {Sequence motifs that are potentially recognized by DNA-binding proteins occur far more often in genomic DNA than do observed in vivo protein–DNA interactions. To determine how chromatin influences the utilization of particular DNA-binding sites, we compared the in vivo genome-wide binding location of the yeast transcription factor Leu3 to the binding location observed on the same genomic DNA in the absence of any protein cofactors. We found that the DNA-sequence motif recognized by Leu3 in vitro and in vivo was functionally indistinguishable, but Leu3 bound different genomic locations under the two conditions. Accounting for nucleosome occupancy in addition to DNA-sequence motifs significantly improved the prediction of protein–DNA interactions in vivo, but not the prediction of sites bound by purified Leu3 in vitro. Use of histone modification data does not further improve binding predictions, presumably because their effect is already manifest in the global histone distribution. Measurements of nucleosome occupancy in strains that differ in Leu3 genotype show that low nucleosome occupancy at loci bound by Leu3 is not a consequence of Leu3 binding. These results permit quantitation of the epigenetic influence that chromatin exerts on DNA binding-site selection, and provide evidence for an instructive, functionally important role for nucleosome occupancy in determining patterns of regulatory factor targeting genome-wide.}, language = {en}, number = {12}, urldate = {2016-08-08}, journal = {Genome Research}, author = {Liu, Xiao and Lee, Cheol-Koo and Granek, Joshua A. and Clarke, Neil D. and Lieb, Jason D.}, month = dec, year = {2006}, pmid = {17053089}, pages = {1517--1528}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GA2KFRND/Liu et al. - 2006 - Whole-genome comparison of Leu3 binding in vitro a.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/P9UXRF68/1517.html:text/html} } @article{zhu_high-resolution_2009, title = {High-resolution {DNA}-binding specificity analysis of yeast transcription factors}, volume = {19}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/19/4/556}, doi = {10.1101/gr.090233.108}, abstract = {Transcription factors (TFs) regulate the expression of genes through sequence-specific interactions with DNA-binding sites. However, despite recent progress in identifying in vivo TF binding sites by microarray readout of chromatin immunoprecipitation (ChIP-chip), nearly half of all known yeast TFs are of unknown DNA-binding specificities, and many additional predicted TFs remain uncharacterized. To address these gaps in our knowledge of yeast TFs and their cis regulatory sequences, we have determined high-resolution binding profiles for 89 known and predicted yeast TFs, over more than 2.3 million gapped and ungapped 8-bp sequences (“k-mers”). We report 50 new or significantly different direct DNA-binding site motifs for yeast DNA-binding proteins and motifs for eight proteins for which only a consensus sequence was previously known; in total, this corresponds to over a 50\% increase in the number of yeast DNA-binding proteins with experimentally determined DNA-binding specificities. Among other novel regulators, we discovered proteins that bind the PAC (Polymerase A and C) motif (GATGAG) and regulate ribosomal RNA (rRNA) transcription and processing, core cellular processes that are constituent to ribosome biogenesis. In contrast to earlier data types, these comprehensive k-mer binding data permit us to consider the regulatory potential of genomic sequence at the individual word level. These k-mer data allowed us to reannotate in vivo TF binding targets as direct or indirect and to examine TFs' potential effects on gene expression in ∼1700 environmental and cellular conditions. These approaches could be adapted to identify TFs and cis regulatory elements in higher eukaryotes.}, language = {en}, number = {4}, urldate = {2016-08-08}, journal = {Genome Research}, author = {Zhu, Cong and Byers, Kelsey J. R. P. and McCord, Rachel Patton and Shi, Zhenwei and Berger, Michael F. and Newburger, Daniel E. and Saulrieta, Katrina and Smith, Zachary and Shah, Mita V. and Radhakrishnan, Mathangi and Philippakis, Anthony A. and Hu, Yanhui and Masi, Federico De and Pacek, Marcin and Rolfs, Andreas and Murthy, Tal and LaBaer, Joshua and Bulyk, Martha L.}, month = apr, year = {2009}, pmid = {19158363}, pages = {556--566}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/DFQVE56N/Zhu et al. - 2009 - High-resolution DNA-binding specificity analysis o.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BIRIECRR/556.html:text/html} } @article{weirauch_evaluation_2013, title = {Evaluation of methods for modeling transcription factor sequence specificity}, volume = {31}, copyright = {© 2013 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1087-0156}, url = {http://www.nature.com/nbt/journal/v31/n2/abs/nbt.2486.html}, doi = {10.1038/nbt.2486}, abstract = {Genomic analyses often involve scanning for potential transcription factor (TF) binding sites using models of the sequence specificity of DNA binding proteins. Many approaches have been developed to model and learn a protein's DNA-binding specificity, but these methods have not been systematically compared. Here we applied 26 such approaches to in vitro protein binding microarray data for 66 mouse TFs belonging to various families. For nine TFs, we also scored the resulting motif models on in vivo data, and found that the best in vitro–derived motifs performed similarly to motifs derived from the in vivo data. Our results indicate that simple models based on mononucleotide position weight matrices trained by the best methods perform similarly to more complex models for most TFs examined, but fall short in specific cases ({\textless}10\% of the TFs examined here). In addition, the best-performing motifs typically have relatively low information content, consistent with widespread degeneracy in eukaryotic TF sequence preferences. View full text}, language = {en}, number = {2}, urldate = {2016-08-08}, journal = {Nature Biotechnology}, author = {Weirauch, Matthew T. and Cote, Atina and Norel, Raquel and Annala, Matti and Zhao, Yue and Riley, Todd R. and Saez-Rodriguez, Julio and Cokelaer, Thomas and Vedenko, Anastasia and Talukder, Shaheynoor and {Dream5 Consortium} and Bussemaker, Harmen J. and Morris, Quaid D. and Bulyk, Martha L. and Stolovitzky, Gustavo and Hughes, Timothy R.}, month = feb, year = {2013}, keywords = {Transcription Factors, functional genomics, Computational biology and bioinformatics}, pages = {126--134}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QINBEMQC/Weirauch et al. - 2013 - Evaluation of methods for modeling transcription f.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ANR3WQZ2/nbt.2486.html:text/html} } @article{zhao_quantitative_2011, title = {Quantitative analysis demonstrates most transcription factors require only simple models of specificity}, volume = {29}, copyright = {© 2011 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1087-0156}, url = {http://www.nature.com/nbt/journal/v29/n6/full/nbt.1893.html}, doi = {10.1038/nbt.1893}, language = {en}, number = {6}, urldate = {2016-08-11}, journal = {Nature Biotechnology}, author = {Zhao, Yue and Stormo, Gary D.}, month = jun, year = {2011}, keywords = {Transcription Factors, functional genomics, Computational biology and bioinformatics}, pages = {480--483}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3874FVNA/Zhao et Stormo - 2011 - Quantitative analysis demonstrates most transcript.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XBGB842T/nbt.1893.html:text/html} } @article{ha_cops:_2012, title = {{COPS}: {Detecting} {Co}-{Occurrence} and {Spatial} {Arrangement} of {Transcription} {Factor} {Binding} {Motifs} in {Genome}-{Wide} {Datasets}}, volume = {7}, issn = {1932-6203}, shorttitle = {{COPS}}, url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0052055}, doi = {10.1371/journal.pone.0052055}, abstract = {In multi-cellular organisms, spatiotemporal activity of cis -regulatory DNA elements depends on their occupancy by different transcription factors (TFs). In recent years, genome-wide ChIP-on-Chip, ChIP-Seq and DamID assays have been extensively used to unravel the combinatorial interaction of TFs with cis -regulatory modules (CRMs) in the genome. Even though genome-wide binding profiles are increasingly becoming available for different TFs, single TF binding profiles are in most cases not sufficient for dissecting complex regulatory networks. Thus, potent computational tools detecting statistically significant and biologically relevant TF-motif co-occurrences in genome-wide datasets are essential for analyzing context-dependent transcriptional regulation. We have developed COPS ( C o- O ccurrence P attern S earch), a new bioinformatics tool based on a combination of association rules and Markov chain models, which detects co-occurring TF binding sites (BSs) on genomic regions of interest. COPS scans DNA sequences for frequent motif patterns using a Frequent-Pattern tree based data mining approach, which allows efficient performance of the software with respect to both data structure and implementation speed, in particular when mining large datasets. Since transcriptional gene regulation very often relies on the formation of regulatory protein complexes mediated by closely adjoining TF binding sites on CRMs, COPS additionally detects preferred short distance between co-occurring TF motifs. The performance of our software with respect to biological significance was evaluated using three published datasets containing genomic regions that are independently bound by several TFs involved in a defined biological process. In sum, COPS is a fast, efficient and user-friendly tool mining statistically and biologically significant TFBS co-occurrences and therefore allows the identification of TFs that combinatorially regulate gene expression.}, number = {12}, urldate = {2016-08-15}, journal = {PLOS ONE}, author = {Ha, Nati and Polychronidou, Maria and Lohmann, Ingrid}, year = {2012}, keywords = {Gene regulation, Sequence motif analysis, Invertebrate genomics, Genome analysis, Mammalian genomics, Drosophila melanogaster, Genomic databases, Mesoderm}, pages = {e52055}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/T3FVC4RM/Ha et al. - 2012 - COPS Detecting Co-Occurrence and Spatial Arrangem.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/DCMZ65QU/article.html:text/html} } @article{touzet_efficient_2007, title = {Efficient and accurate {P}-value computation for {Position} {Weight} {Matrices}}, volume = {2}, issn = {1748-7188}, url = {http://dx.doi.org/10.1186/1748-7188-2-15}, doi = {10.1186/1748-7188-2-15}, abstract = {Position Weight Matrices (PWMs) are probabilistic representations of signals in sequences. They are widely used to model approximate patterns in DNA or in protein sequences. The usage of PWMs needs as a prerequisite to knowing the statistical significance of a word according to its score. This is done by defining the P-value of a score, which is the probability that the background model can achieve a score larger than or equal to the observed value. This gives rise to the following problem: Given a P-value, find the corresponding score threshold. Existing methods rely on dynamic programming or probability generating functions. For many examples of PWMs, they fail to give accurate results in a reasonable amount of time.}, urldate = {2016-08-16}, journal = {Algorithms for Molecular Biology}, author = {Touzet, Hélène and Varré, Jean-Stéphane}, year = {2007}, pages = {15}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/25QZE893/Touzet et Varré - 2007 - Efficient and accurate P-value computation for Pos.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ENEM24D3/1748-7188-2-15.html:text/html} } @article{loo_computational_2009, title = {Computational methods for the detection of cis-regulatory modules}, volume = {10}, issn = {1467-5463, 1477-4054}, url = {http://bib.oxfordjournals.org/content/10/5/509}, doi = {10.1093/bib/bbp025}, abstract = {Metazoan transcription regulation occurs through the concerted action of multiple transcription factors that bind co-operatively to cis-regulatory modules (CRMs). The annotation of these key regulators of transcription is lagging far behind the annotation of the transcriptome itself. Here, we give an overview of existing computational methods to detect these CRMs in metazoan genomes. We subdivide these methods into three classes: CRM scanners screen sequences for CRMs based on predefined models that often consist of multiple position weight matrices (PWMs). CRM builders construct models of similar CRMs controlling a set of co-regulated or co-expressed genes. CRM genome screeners screen sequences or complete genomes for CRMs as homotypic or heterotypic clusters of binding sites for any combination of transcription factors. We believe that CRM scanners are currently the most advanced methods, although their applicability is limited. Finally, we argue that CRM builders that make use of PWM libraries will benefit greatly from future advances and will prove to be most instrumental for the annotation of regulatory regions in metazoan genomes.}, language = {en}, number = {5}, urldate = {2016-08-16}, journal = {Briefings in Bioinformatics}, author = {Loo, Peter Van and Marynen, Peter}, month = sep, year = {2009}, pmid = {19498042}, keywords = {transcription regulation, cis-regulatory modules, genome annotation, regulatory regions, computational CRM detection}, pages = {509--524}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/U5ZEZPMH/Loo et Marynen - 2009 - Computational methods for the detection of cis-reg.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RXMCQQB8/509.html:text/html} } @article{yip_classification_2012, title = {Classification of human genomic regions based on experimentally determined binding sites of more than 100 transcription-related factors}, volume = {13}, issn = {1474-760X}, url = {http://dx.doi.org/10.1186/gb-2012-13-9-r48}, doi = {10.1186/gb-2012-13-9-r48}, abstract = {Transcription factors function by binding different classes of regulatory elements. The Encyclopedia of DNA Elements (ENCODE) project has recently produced binding data for more than 100 transcription factors from about 500 ChIP-seq experiments in multiple cell types. While this large amount of data creates a valuable resource, it is nonetheless overwhelmingly complex and simultaneously incomplete since it covers only a small fraction of all human transcription factors.}, urldate = {2016-08-17}, journal = {Genome Biology}, author = {Yip, Kevin Y. and Cheng, Chao and Bhardwaj, Nitin and Brown, James B. and Leng, Jing and Kundaje, Anshul and Rozowsky, Joel and Birney, Ewan and Bickel, Peter and Snyder, Michael and Gerstein, Mark}, year = {2012}, pages = {R48}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HCR3FP2I/Yip et al. - 2012 - Classification of human genomic regions based on e.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Z7ZTJIFA/gb-2012-13-9-r48.html:text/html} } @article{natarajan_predicting_2012, title = {Predicting cell-type–specific gene expression from regions of open chromatin}, volume = {22}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/22/9/1711}, doi = {10.1101/gr.135129.111}, abstract = {Complex patterns of cell-type–specific gene expression are thought to be achieved by combinatorial binding of transcription factors (TFs) to sequence elements in regulatory regions. Predicting cell-type–specific expression in mammals has been hindered by the oftentimes unknown location of distal regulatory regions. To alleviate this bottleneck, we used DNase-seq data from 19 diverse human cell types to identify proximal and distal regulatory elements at genome-wide scale. Matched expression data allowed us to separate genes into classes of cell-type–specific up-regulated, down-regulated, and constitutively expressed genes. CG dinucleotide content and DNA accessibility in the promoters of these three classes of genes displayed substantial differences, highlighting the importance of including these aspects in modeling gene expression. We associated DNase I hypersensitive sites (DHSs) with genes, and trained classifiers for different expression patterns. TF sequence motif matches in DHSs provided a strong performance improvement in predicting gene expression over the typical baseline approach of using proximal promoter sequences. In particular, we achieved competitive performance when discriminating up-regulated genes from different cell types or genes up- and down-regulated under the same conditions. We identified previously known and new candidate cell-type–specific regulators. The models generated testable predictions of activating or repressive functions of regulators. DNase I footprints for these regulators were indicative of their direct binding to DNA. In summary, we successfully used information of open chromatin obtained by a single assay, DNase-seq, to address the problem of predicting cell-type–specific gene expression in mammalian organisms directly from regulatory sequence.}, language = {en}, number = {9}, urldate = {2016-08-17}, journal = {Genome Research}, author = {Natarajan, Anirudh and Yardımcı, Galip Gürkan and Sheffield, Nathan C. and Crawford, Gregory E. and Ohler, Uwe}, month = sep, year = {2012}, pmid = {22955983}, pages = {1711--1722}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KQRSH46X/Natarajan et al. - 2012 - Predicting cell-type–specific gene expression from.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IVRNI5D9/1711.html:text/html} } @article{neph_expansive_2012, title = {An expansive human regulatory lexicon encoded in transcription factor footprints}, volume = {489}, copyright = {© 2012 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v489/n7414/full/nature11212.html}, doi = {10.1038/nature11212}, abstract = {Regulatory factor binding to genomic DNA protects the underlying sequence from cleavage by DNase I, leaving nucleotide-resolution footprints. Using genomic DNase I footprinting across 41 diverse cell and tissue types, we detected 45 million transcription factor occupancy events within regulatory regions, representing differential binding to 8.4 million distinct short sequence elements. Here we show that this small genomic sequence compartment, roughly twice the size of the exome, encodes an expansive repertoire of conserved recognition sequences for DNA-binding proteins that nearly doubles the size of the human cis–regulatory lexicon. We find that genetic variants affecting allelic chromatin states are concentrated in footprints, and that these elements are preferentially sheltered from DNA methylation. High-resolution DNase I cleavage patterns mirror nucleotide-level evolutionary conservation and track the crystallographic topography of protein–DNA interfaces, indicating that transcription factor structure has been evolutionarily imprinted on the human genome sequence. We identify a stereotyped 50-base-pair footprint that precisely defines the site of transcript origination within thousands of human promoters. Finally, we describe a large collection of novel regulatory factor recognition motifs that are highly conserved in both sequence and function, and exhibit cell-selective occupancy patterns that closely parallel major regulators of development, differentiation and pluripotency.}, language = {en}, number = {7414}, urldate = {2016-08-17}, journal = {Nature}, author = {Neph, Shane and Vierstra, Jeff and Stergachis, Andrew B. and Reynolds, Alex P. and Haugen, Eric and Vernot, Benjamin and Thurman, Robert E. and John, Sam and Sandstrom, Richard and Johnson, Audra K. and Maurano, Matthew T. and Humbert, Richard and Rynes, Eric and Wang, Hao and Vong, Shinny and Lee, Kristen and Bates, Daniel and Diegel, Morgan and Roach, Vaughn and Dunn, Douglas and Neri, Jun and Schafer, Anthony and Hansen, R. Scott and Kutyavin, Tanya and Giste, Erika and Weaver, Molly and Canfield, Theresa and Sabo, Peter and Zhang, Miaohua and Balasundaram, Gayathri and Byron, Rachel and MacCoss, Michael J. and Akey, Joshua M. and Bender, M. A. and Groudine, Mark and Kaul, Rajinder and Stamatoyannopoulos, John A.}, month = sep, year = {2012}, keywords = {evolution, genetics, genomics, molecular biology}, pages = {83--90}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/M3T8WI2W/Neph et al. - 2012 - An expansive human regulatory lexicon encoded in t.pdf:application/pdf;Neph et al.-2012 - supplemental.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/M3T8WI2W/Neph et al.-2012 - supplemental.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MCX576A7/nature11212.html:text/html} } @article{gerstein_architecture_2012, title = {Architecture of the human regulatory network derived from {ENCODE} data}, volume = {489}, copyright = {© 2012 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v489/n7414/full/nature11245.html}, doi = {10.1038/nature11245}, abstract = {Transcription factors bind in a combinatorial fashion to specify the on-and-off states of genes; the ensemble of these binding events forms a regulatory network, constituting the wiring diagram for a cell. To examine the principles of the human transcriptional regulatory network, we determined the genomic binding information of 119 transcription-related factors in over 450 distinct experiments. We found the combinatorial, co-association of transcription factors to be highly context specific: distinct combinations of factors bind at specific genomic locations. In particular, there are significant differences in the binding proximal and distal to genes. We organized all the transcription factor binding into a hierarchy and integrated it with other genomic information (for example, microRNA regulation), forming a dense meta-network. Factors at different levels have different properties; for instance, top-level transcription factors more strongly influence expression and middle-level ones co-regulate targets to mitigate information-flow bottlenecks. Moreover, these co-regulations give rise to many enriched network motifs (for example, noise-buffering feed-forward loops). Finally, more connected network components are under stronger selection and exhibit a greater degree of allele-specific activity (that is, differential binding to the two parental alleles). The regulatory information obtained in this study will be crucial for interpreting personal genome sequences and understanding basic principles of human biology and disease.}, language = {en}, number = {7414}, urldate = {2016-08-17}, journal = {Nature}, author = {Gerstein, Mark B. and Kundaje, Anshul and Hariharan, Manoj and Landt, Stephen G. and Yan, Koon-Kiu and Cheng, Chao and Mu, Xinmeng Jasmine and Khurana, Ekta and Rozowsky, Joel and Alexander, Roger and Min, Renqiang and Alves, Pedro and Abyzov, Alexej and Addleman, Nick and Bhardwaj, Nitin and Boyle, Alan P. and Cayting, Philip and Charos, Alexandra and Chen, David Z. and Cheng, Yong and Clarke, Declan and Eastman, Catharine and Euskirchen, Ghia and Frietze, Seth and Fu, Yao and Gertz, Jason and Grubert, Fabian and Harmanci, Arif and Jain, Preti and Kasowski, Maya and Lacroute, Phil and Leng, Jing and Lian, Jin and Monahan, Hannah and O’Geen, Henriette and Ouyang, Zhengqing and Partridge, E. Christopher and Patacsil, Dorrelyn and Pauli, Florencia and Raha, Debasish and Ramirez, Lucia and Reddy, Timothy E. and Reed, Brian and Shi, Minyi and Slifer, Teri and Wang, Jing and Wu, Linfeng and Yang, Xinqiong and Yip, Kevin Y. and Zilberman-Schapira, Gili and Batzoglou, Serafim and Sidow, Arend and Farnham, Peggy J. and Myers, Richard M. and Weissman, Sherman M. and Snyder, Michael}, month = sep, year = {2012}, keywords = {genetics, genomics}, pages = {91--100}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/47UG7U49/Gerstein et al. - 2012 - Architecture of the human regulatory network deriv.pdf:application/pdf;Gerstein et al. - 2012 - supplemental.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/47UG7U49/Gerstein et al. - 2012 - supplemental.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9SNJKB5X/nature11245.html:text/html} } @article{whitfield_functional_2012, title = {Functional analysis of transcription factor binding sites in human promoters}, volume = {13}, issn = {1474-760X}, url = {http://dx.doi.org/10.1186/gb-2012-13-9-r50}, doi = {10.1186/gb-2012-13-9-r50}, abstract = {The binding of transcription factors to specific locations in the genome is integral to the orchestration of transcriptional regulation in cells. To characterize transcription factor binding site function on a large scale, we predicted and mutagenized 455 binding sites in human promoters. We carried out functional tests on these sites in four different immortalized human cell lines using transient transfections with a luciferase reporter assay, primarily for the transcription factors CTCF, GABP, GATA2, E2F, STAT, and YY1.}, urldate = {2016-08-17}, journal = {Genome Biology}, author = {Whitfield, Troy W. and Wang, Jie and Collins, Patrick J. and Partridge, E. Christopher and Aldred, Shelley Force and Trinklein, Nathan D. and Myers, Richard M. and Weng, Zhiping}, year = {2012}, pages = {R50}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IPRTU6H6/Whitfield et al. - 2012 - Functional analysis of transcription factor bindin.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/AGSRGNM2/gb-2012-13-9-r50.html:text/html} } @article{heinz_simple_2010, title = {Simple {Combinations} of {Lineage}-{Determining} {Transcription} {Factors} {Prime} cis-{Regulatory} {Elements} {Required} for {Macrophage} and {B} {Cell} {Identities}}, volume = {38}, issn = {1097-2765}, url = {http://www.sciencedirect.com/science/article/pii/S1097276510003667}, doi = {10.1016/j.molcel.2010.05.004}, abstract = {Summary Genome-scale studies have revealed extensive, cell type-specific colocalization of transcription factors, but the mechanisms underlying this phenomenon remain poorly understood. Here, we demonstrate in macrophages and B cells that collaborative interactions of the common factor PU.1 with small sets of macrophage- or B cell lineage-determining transcription factors establish cell-specific binding sites that are associated with the majority of promoter-distal H3K4me1-marked genomic regions. PU.1 binding initiates nucleosome remodeling, followed by H3K4 monomethylation at large numbers of genomic regions associated with both broadly and specifically expressed genes. These locations serve as beacons for additional factors, exemplified by liver X receptors, which drive both cell-specific gene expression and signal-dependent responses. Together with analyses of transcription factor binding and H3K4me1 patterns in other cell types, these studies suggest that simple combinations of lineage-determining transcription factors can specify the genomic sites ultimately responsible for both cell identity and cell type-specific responses to diverse signaling inputs.}, number = {4}, urldate = {2016-08-23}, journal = {Molecular Cell}, author = {Heinz, Sven and Benner, Christopher and Spann, Nathanael and Bertolino, Eric and Lin, Yin C. and Laslo, Peter and Cheng, Jason X. and Murre, Cornelis and Singh, Harinder and Glass, Christopher K.}, month = may, year = {2010}, keywords = {DNA, MOLIMMUNO}, pages = {576--589}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/CPTT6622/Heinz et al. - 2010 - Simple Combinations of Lineage-Determining Transcr.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RHRR6DM6/S1097276510003667.html:text/html} } @article{perner_inference_2014, title = {Inference of interactions between chromatin modifiers and histone modifications: from {ChIP}-{Seq} data to chromatin-signaling}, volume = {42}, issn = {0305-1048, 1362-4962}, shorttitle = {Inference of interactions between chromatin modifiers and histone modifications}, url = {http://nar.oxfordjournals.org/content/42/22/13689}, doi = {10.1093/nar/gku1234}, abstract = {Chromatin modifiers and histone modifications are components of a chromatin-signaling network involved in transcription and its regulation. The interactions between chromatin modifiers and histone modifications are often unknown, are based on the analysis of few genes or are studied in vitro. Here, we apply computational methods to recover interactions between chromatin modifiers and histone modifications from genome-wide ChIP-Seq data. These interactions provide a high-confidence backbone of the chromatin-signaling network. Many recovered interactions have literature support; others provide hypotheses about yet unknown interactions. We experimentally verified two of these predicted interactions, leading to a link between H4K20me1 and members of the Polycomb Repressive Complexes 1 and 2. Our results suggest that our computationally derived interactions are likely to lead to novel biological insights required to establish the connectivity of the chromatin-signaling network involved in transcription and its regulation.}, language = {en}, number = {22}, urldate = {2016-09-27}, journal = {Nucleic Acids Research}, author = {Perner, Juliane and Lasserre, Julia and Kinkley, Sarah and Vingron, Martin and Chung, Ho-Ryun}, month = dec, year = {2014}, pmid = {25414326}, pages = {13689--13695}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/X5PX2TZR/Perner et al. - 2014 - Inference of interactions between chromatin modifi.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/EFRWRTEU/13689.html:text/html} } @article{mukherjee_rapid_2004, title = {Rapid analysis of the {DNA}-binding specificities of transcription factors with {DNA} microarrays}, volume = {36}, copyright = {© 2004 Nature Publishing Group}, issn = {1061-4036}, url = {http://www.nature.com/ng/journal/v36/n12/full/ng1473.html}, doi = {10.1038/ng1473}, abstract = {We developed a new DNA microarray-based technology, called protein binding microarrays (PBMs), that allows rapid, high-throughput characterization of the in vitro DNA binding–site sequence specificities of transcription factors in a single day. Using PBMs, we identified the DNA binding–site sequence specificities of the yeast transcription factors Abf1, Rap1 and Mig1. Comparison of these proteins' in vitro binding sites with their in vivo binding sites indicates that PBM-derived sequence specificities can accurately reflect in vivo DNA sequence specificities. In addition to previously identified targets, Abf1, Rap1 and Mig1 bound to 107, 90 and 75 putative new target intergenic regions, respectively, many of which were upstream of previously uncharacterized open reading frames. Comparative sequence analysis indicated that many of these newly identified sites are highly conserved across five sequenced sensu stricto yeast species and, therefore, are probably functional in vivo binding sites that may be used in a condition-specific manner. Similar PBM experiments should be useful in identifying new cis regulatory elements and transcriptional regulatory networks in various genomes.}, language = {en}, number = {12}, urldate = {2016-08-23}, journal = {Nature Genetics}, author = {Mukherjee, Sonali and Berger, Michael F. and Jona, Ghil and Wang, Xun S. and Muzzey, Dale and Snyder, Michael and Young, Richard A. and Bulyk, Martha L.}, month = dec, year = {2004}, pages = {1331--1339}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/W3HW5SCU/Mukherjee et al. - 2004 - Rapid analysis of the DNA-binding specificities of.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/66IUM3A8/ng1473.html:text/html} } @article{berger_compact_2006, title = {Compact, universal {DNA} microarrays to comprehensively determine transcription-factor binding site specificities}, volume = {24}, copyright = {© 2006 Nature Publishing Group}, issn = {1087-0156}, url = {http://www.nature.com/nbt/journal/v24/n11/full/nbt1246.html}, doi = {10.1038/nbt1246}, abstract = {Transcription factors (TFs) interact with specific DNA regulatory sequences to control gene expression throughout myriad cellular processes. However, the DNA binding specificities of only a small fraction of TFs are sufficiently characterized to predict the sequences that they can and cannot bind. We present a maximally compact, synthetic DNA sequence design for protein binding microarray (PBM) experiments that represents all possible DNA sequence variants of a given length k (that is, all 'k-mers') on a single, universal microarray. We constructed such all k-mer microarrays covering all 10–base pair (bp) binding sites by converting high-density single-stranded oligonucleotide arrays to double-stranded (ds) DNA arrays. Using these microarrays we comprehensively determined the binding specificities over a full range of affinities for five TFs of different structural classes from yeast, worm, mouse and human. The unbiased coverage of all k-mers permits high-throughput interrogation of binding site preferences, including nucleotide interdependencies, at unprecedented resolution.}, language = {en}, number = {11}, urldate = {2016-08-23}, journal = {Nature Biotechnology}, author = {Berger, Michael F. and Philippakis, Anthony A. and Qureshi, Aaron M. and He, Fangxue S. and Estep, Preston W. and Bulyk, Martha L.}, month = nov, year = {2006}, pages = {1429--1435}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/F5R2C9GV/Berger et al. - 2006 - Compact, universal DNA microarrays to comprehensiv.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/NV6A7NWG/nbt1246.html:text/html} } @article{kouzarides_chromatin_2007, title = {Chromatin {Modifications} and {Their} {Function}}, volume = {128}, issn = {0092-8674}, url = {http://www.sciencedirect.com/science/article/pii/S0092867407001845}, doi = {10.1016/j.cell.2007.02.005}, abstract = {The surface of nucleosomes is studded with a multiplicity of modifications. At least eight different classes have been characterized to date and many different sites have been identified for each class. Operationally, modifications function either by disrupting chromatin contacts or by affecting the recruitment of nonhistone proteins to chromatin. Their presence on histones can dictate the higher-order chromatin structure in which DNA is packaged and can orchestrate the ordered recruitment of enzyme complexes to manipulate DNA. In this way, histone modifications have the potential to influence many fundamental biological processes, some of which may be epigenetically inherited.}, number = {4}, urldate = {2016-08-25}, journal = {Cell}, author = {Kouzarides, Tony}, month = feb, year = {2007}, pages = {693--705}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5W94T3M9/Kouzarides - 2007 - Chromatin Modifications and Their Function.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SS4ZR8W3/S0092867407001845.html:text/html} } @article{fischle_regulation_2005, title = {Regulation of {HP}1–chromatin binding by histone {H}3 methylation and phosphorylation}, volume = {438}, copyright = {© 2005 Nature Publishing Group}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v438/n7071/full/nature04219.html}, doi = {10.1038/nature04219}, abstract = {Tri-methylation of histone H3 lysine 9 is important for recruiting heterochromatin protein 1 (HP1) to discrete regions of the genome, thereby regulating gene expression, chromatin packaging and heterochromatin formation. Here we show that HP1, -, and - are released from chromatin during the M phase of the cell cycle, even though tri-methylation levels of histone H3 lysine 9 remain unchanged. However, the additional, transient modification of histone H3 by phosphorylation of serine 10 next to the more stable methyl-lysine 9 mark is sufficient to eject HP1 proteins from their binding sites. Inhibition or depletion of the mitotic kinase Aurora B, which phosphorylates serine 10 on histone H3, causes retention of HP1 proteins on mitotic chromosomes, suggesting that H3 serine 10 phosphorylation is necessary for the dissociation of HP1 from chromatin in M phase. These findings establish a regulatory mechanism of protein–protein interactions, through a combinatorial readout of two adjacent post-translational modifications: a stable methylation and a dynamic phosphorylation mark.}, language = {en}, number = {7071}, urldate = {2016-08-25}, journal = {Nature}, author = {Fischle, Wolfgang and Tseng, Boo Shan and Dormann, Holger L. and Ueberheide, Beatrix M. and Garcia, Benjamin A. and Shabanowitz, Jeffrey and Hunt, Donald F. and Funabiki, Hironori and Allis, C. David}, month = dec, year = {2005}, pages = {1116--1122}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5CFJUGEB/Fischle et al. - 2005 - Regulation of HP1–chromatin binding by histone H3 .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/B5G4MKGB/nature04219.html:text/html} } @article{stormo_use_1982, title = {Use of the ‘{Perceptron}’ algorithm to distinguish translational initiation sites in {E}. coli}, volume = {10}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/10/9/2997}, doi = {10.1093/nar/10.9.2997}, abstract = {We have used a “Perceptron” algorithm to find a weighting function which distinguishes E. coli translational initiation sites from all other sites in a library of over 78,000 nucleotides of mRNA sequence. The “Perceptron” examined sequences as linear representations. The “Perceptron” is more successful at finding gene beginnings than our previous searches using “rules” (see previous paper). We note that the weighting function can find translational initiation sites within sequences that were not included in the training set.}, language = {en}, number = {9}, urldate = {2016-08-25}, journal = {Nucleic Acids Research}, author = {Stormo, Gary D. and Schneider, Thomas D. and Gold, Larry and Ehrenfeucht, Andrzej}, month = may, year = {1982}, pmid = {7048259}, pages = {2997--3011}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IU3EEKKP/Stormo et al. - 1982 - Use of the ‘Perceptron’ algorithm to distinguish t.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/2V79XU79/2997.html:text/html} } @article{takahashi_induction_2006, title = {Induction of {Pluripotent} {Stem} {Cells} from {Mouse} {Embryonic} and {Adult} {Fibroblast} {Cultures} by {Defined} {Factors}}, volume = {126}, issn = {0092-8674, 1097-4172}, url = {/cell/abstract/S0092-8674(06)00976-7}, doi = {10.1016/j.cell.2006.07.024}, abstract = {Differentiated cells can be reprogrammed to an embryonic-like state by transfer of nuclear contents into oocytes or by fusion with embryonic stem (ES) cells. Little is known about factors that induce this reprogramming. Here, we demonstrate induction of pluripotent stem cells from mouse embryonic or adult fibroblasts by introducing four factors, Oct3/4, Sox2, c-Myc, and Klf4, under ES cell culture conditions. Unexpectedly, Nanog was dispensable. These cells, which we designated iPS (induced pluripotent stem) cells, exhibit the morphology and growth properties of ES cells and express ES cell marker genes. Subcutaneous transplantation of iPS cells into nude mice resulted in tumors containing a variety of tissues from all three germ layers. Following injection into blastocysts, iPS cells contributed to mouse embryonic development. These data demonstrate that pluripotent stem cells can be directly generated from fibroblast cultures by the addition of only a few defined factors.}, language = {English}, number = {4}, urldate = {2016-08-26}, journal = {Cell}, author = {Takahashi, Kazutoshi and Yamanaka, Shinya}, month = aug, year = {2006}, pmid = {16904174, 16904174}, pages = {663--676}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/47IZ9WIV/Takahashi et Yamanaka - 2006 - Induction of Pluripotent Stem Cells from Mouse Emb.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/N9BCVH52/S0092-8674(06)00976-7.html:text/html} } @article{yu_induced_2007, title = {Induced {Pluripotent} {Stem} {Cell} {Lines} {Derived} from {Human} {Somatic} {Cells}}, volume = {318}, copyright = {American Association for the Advancement of Science}, issn = {0036-8075, 1095-9203}, url = {http://science.sciencemag.org/content/318/5858/1917}, doi = {10.1126/science.1151526}, abstract = {Somatic cell nuclear transfer allows trans-acting factors present in the mammalian oocyte to reprogram somatic cell nuclei to an undifferentiated state. We show that four factors (OCT4, SOX2, NANOG, and LIN28) are sufficient to reprogram human somatic cells to pluripotent stem cells that exhibit the essential characteristics of embryonic stem (ES) cells. These induced pluripotent human stem cells have normal karyotypes, express telomerase activity, express cell surface markers and genes that characterize human ES cells, and maintain the developmental potential to differentiate into advanced derivatives of all three primary germ layers. Such induced pluripotent human cell lines should be useful in the production of new disease models and in drug development, as well as for applications in transplantation medicine, once technical limitations (for example, mutation through viral integration) are eliminated. Human fibroblasts transfected with four genes exhibit the properties of embryonic stem cells. Human fibroblasts transfected with four genes exhibit the properties of embryonic stem cells.}, language = {en}, number = {5858}, urldate = {2016-08-26}, journal = {Science}, author = {Yu, Junying and Vodyanik, Maxim A. and Smuga-Otto, Kim and Antosiewicz-Bourget, Jessica and Frane, Jennifer L. and Tian, Shulan and Nie, Jeff and Jonsdottir, Gudrun A. and Ruotti, Victor and Stewart, Ron and Slukvin, Igor I. and Thomson, James A.}, month = dec, year = {2007}, pmid = {18029452}, pages = {1917--1920}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QJZJ4TMW/Yu et al. - 2007 - Induced Pluripotent Stem Cell Lines Derived from H.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/N5KDWEPM/1917.html:text/html} } @article{darnell_transcription_2002, title = {Transcription factors as targets for cancer therapy}, volume = {2}, copyright = {© 2002 Nature Publishing Group}, issn = {1474-175X}, url = {http://www.nature.com/nrc/journal/v2/n10/full/nrc906.html}, doi = {10.1038/nrc906}, abstract = {A limited list of transcription factors are overactive in most human cancer cells, which makes them targets for the development of anticancer drugs. That they are the most direct and hopeful targets for treating cancer is proposed, and this is supported by the fact that there are many more human oncogenes in signalling pathways than there are oncogenic transcription factors. But how could specific transcription-factor activity be inhibited?}, language = {en}, number = {10}, urldate = {2016-08-26}, journal = {Nature Reviews Cancer}, author = {Darnell, James E.}, month = oct, year = {2002}, pages = {740--749}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/G93Q5ETV/Darnell - 2002 - Transcription factors as targets for cancer therap.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/E7PDA294/nrc906.html:text/html} } @article{zaret_pioneer_2011, title = {Pioneer transcription factors: establishing competence for gene expression}, volume = {25}, issn = {0890-9369, 1549-5477}, shorttitle = {Pioneer transcription factors}, url = {http://genesdev.cshlp.org/content/25/21/2227}, doi = {10.1101/gad.176826.111}, abstract = {Transcription factors are adaptor molecules that detect regulatory sequences in the DNA and target the assembly of protein complexes that control gene expression. Yet much of the DNA in the eukaryotic cell is in nucleosomes and thereby occluded by histones, and can be further occluded by higher-order chromatin structures and repressor complexes. Indeed, genome-wide location analyses have revealed that, for all transcription factors tested, the vast majority of potential DNA-binding sites are unoccupied, demonstrating the inaccessibility of most of the nuclear DNA. This raises the question of how target sites at silent genes become bound de novo by transcription factors, thereby initiating regulatory events in chromatin. Binding cooperativity can be sufficient for many kinds of factors to simultaneously engage a target site in chromatin and activate gene expression. However, in cases in which the binding of a series of factors is sequential in time and thus not initially cooperative, special “pioneer transcription factors” can be the first to engage target sites in chromatin. Such initial binding can passively enhance transcription by reducing the number of additional factors that are needed to bind the DNA, culminating in activation. In addition, pioneer factor binding can actively open up the local chromatin and directly make it competent for other factors to bind. Passive and active roles for the pioneer factor FoxA occur in embryonic development, steroid hormone induction, and human cancers. Herein we review the field and describe how pioneer factors may enable cellular reprogramming.}, language = {en}, number = {21}, urldate = {2016-08-26}, journal = {Genes \& Development}, author = {Zaret, Kenneth S. and Carroll, Jason S.}, month = nov, year = {2011}, pmid = {22056668}, keywords = {cancer, development, pioneer factors, transcription, activation, competence, steroid hormone receptors}, pages = {2227--2241}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8UMN6NNI/Zaret et Carroll - 2011 - Pioneer transcription factors establishing compet.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/JXSETAKT/2227.html:text/html} } @article{zhao_inferring_2009, title = {Inferring {Binding} {Energies} from {Selected} {Binding} {Sites}}, volume = {5}, issn = {1553-7358}, url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000590}, doi = {10.1371/journal.pcbi.1000590}, abstract = {Author Summary The DNA binding sites of transcription factors that control gene expression are often predicted based on a collection of known or selected binding sites. The most commonly used methods for inferring the binding site pattern, or sequence motif, assume that the sites are selected in proportion to their affinity for the transcription factor, ignoring the effect of the transcription factor concentration. We have developed a new maximum likelihood approach, in a program called BEEML, that directly takes into account the transcription factor concentration as well as non-specific contributions to the binding affinity, and we show in simulation studies that it gives a much more accurate model of the transcription factor binding sites than previous methods. We also develop a new method for extracting binding sites for a transcription factor from a random pool of DNA sequences, called high-throughput SELEX (HT-SELEX), and we show that after a single round of selection BEEML can obtain an accurate model of the transcription factor binding sites.}, number = {12}, urldate = {2016-08-26}, journal = {PLOS Comput Biol}, author = {Zhao, Yue and Granas, David and Stormo, Gary D.}, year = {2009}, keywords = {Transcription Factors, biophysics, Sequence motif analysis, DNA-binding proteins, DNA sequence analysis, Binding analysis, Sequence analysis, Free energy}, pages = {e1000590}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9MID4X6P/Zhao et al. - 2009 - Inferring Binding Energies from Selected Binding S.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/54ET4E87/article.html:text/html} } @article{ellrott_identifying_2002, title = {Identifying transcription factor binding sites through {Markov} chain optimization}, volume = {18}, issn = {1367-4803, 1460-2059}, url = {http://bioinformatics.oxfordjournals.org/content/18/suppl_2/S100}, doi = {10.1093/bioinformatics/18.suppl_2.S100}, abstract = {Even though every cell in an organism contains the same genetic material, each cell does not express the same cohort of genes. Therefore, one of the major problems facing genomic research today is to determine not only which genes are differentially expressed and under what conditions, but also how the expression of those genes is regulated. The first step in determining differential gene expression is the binding of sequence-specific DNA binding proteins (i.e. transcription factors) to regulatory regions of the genes (i.e. promoters and enhancers). An important aspect to understanding how a given transcription factor functions is to know the entire gamut of binding sites and subsequently potential target genes that the factor may bind/regulate. In this study, we have developed a computer algorithm to scan genomic databases for transcription factor binding sites, based on a novel Markov chain optimization method, and used it to scan the human genome for sites that bind to hepatocyte nuclear factor 4 α (HNF4α). A list of 71 known HNF4α binding sites from the literature were used to train our Markov chain model. By looking at the window of 600 nucleotides around the transcription start site of each confirmed gene on the human genome, we identified 849 sites with varying binding potential and experimentally tested 109 of those sites for binding to HNF4α. Our results show that the program was very successful in identifying 77 new HNF4α binding sites with varying binding affinities (i.e. a 71\% success rate). Therefore, this computational method for searching genomic databases for potential transcription factor binding sites is a powerful tool for investigating mechanisms of differential gene regulation. Contact: jiang@cs.ucr.edu}, language = {en}, number = {suppl 2}, urldate = {2016-08-29}, journal = {Bioinformatics}, author = {Ellrott, Kyle and Yang, Chuhu and Sladek, Frances M. and Jiang, Tao}, month = oct, year = {2002}, pmid = {12385991}, pages = {S100--S109}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5226R9D9/Ellrott et al. - 2002 - Identifying transcription factor binding sites thr.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/U4937D39/S100.html:text/html} } @article{granek_explicit_2005, title = {Explicit equilibrium modeling of transcription-factor binding and gene regulation}, volume = {6}, issn = {1474-760X}, url = {http://dx.doi.org/10.1186/gb-2005-6-10-r87}, doi = {10.1186/gb-2005-6-10-r87}, abstract = {We have developed a computational model that predicts the probability of transcription factor binding to any site in the genome. GOMER (generalizable occupancy model of expression regulation) calculates binding probabilities on the basis of position weight matrices, and incorporates the effects of cooperativity and competition by explicit calculation of coupled binding equilibria. GOMER can be used to test hypotheses regarding gene regulation that build upon this physically principled prediction of protein-DNA binding.}, urldate = {2016-08-29}, journal = {Genome Biology}, author = {Granek, Joshua A. and Clarke, Neil D.}, year = {2005}, pages = {R87}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UNX9KDTM/Granek et Clarke - 2005 - Explicit equilibrium modeling of transcription-fac.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7H3ZRUJ6/gb-2005-6-10-r87.html:text/html} } @article{berg_selection_1988, title = {Selection of {DNA} binding sites by regulatory proteins}, volume = {200}, issn = {0022-2836}, url = {http://www.sciencedirect.com/science/article/pii/0022283688904822}, doi = {10.1016/0022-2836(88)90482-2}, abstract = {The statistics of base-pair usage within known recognition sites for a particular DNA-binding protein can be used to estimate the relative protein binding affinities to these sites, as well as to sites containing any other combinations of base-pairs. As has been described elsewhere, the connection between base-pair statistics and binding free energy is made by an equal probability selection assumption; i.e. that all base-pair sequences that provide appropriate binding strength are equally likely to have been chosen as recognition sites in the course of evolution. This is analogous to a statistical-mechanical system where all configurations with the same energy are equally likely to occur. In this communication, we apply the statistical-mechanical selection theory to analyze the base-pair statistics of the known recognition sequences for the cyclic AMP receptor protein (CRP). The theoretical predictions are found to be in reasonable agreement with binding data for those sequences for which experimental binding information is available, thus lending support to the basic assumptions of the selection theory. On the basis of this agreement, we can predict the affinity for CRP binding to any base-pair sequence, albeit with a large statistical uncertainty. When the known recognition sites for CRP are ranked according to predicted binding affinities, we find that the ranking is consistent with the hypothesis that the level of function of these sites parallels their fractional saturation with CRP-cAMP under in-vivo conditions. When applied to the entire genome, the theory predicts the existence of a large number of randomly occurring “pseudosites” with strong binding affinity for CRP. It appears that most CRP molecules are engaged in non-productive binding at non-specific or pseudospecific sites under in-vivo conditions. In this sense, the specificity of the CRP binding site is very low. Relative specificity requirements for polymerases, repressors and activators are compared in light of the results of this and the first paper in this series.}, number = {4}, urldate = {2016-08-29}, journal = {Journal of Molecular Biology}, author = {Berg, Otto G. and von Hippel, Peter H.}, month = apr, year = {1988}, pages = {709--723}, file = {1-s2.0-0968000488900850-main.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MN27QNMC/1-s2.0-0968000488900850-main.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MN27QNMC/0022283688904822.html:text/html} } @article{huang_optimized_2006, title = {Optimized mixed {Markov} models for motif identification}, volume = {7}, issn = {1471-2105}, url = {http://dx.doi.org/10.1186/1471-2105-7-279}, doi = {10.1186/1471-2105-7-279}, abstract = {Identifying functional elements, such as transcriptional factor binding sites, is a fundamental step in reconstructing gene regulatory networks and remains a challenging issue, largely due to limited availability of training samples.}, urldate = {2016-08-31}, journal = {BMC Bioinformatics}, author = {Huang, Weichun and Umbach, David M. and Ohler, Uwe and Li, Leping}, year = {2006}, pages = {279}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RQARQ6ZZ/Huang et al. - 2006 - Optimized mixed Markov models for motif identifica.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TF3D3CHE/1471-2105-7-279.html:text/html} } @article{maaskola_binding_2014, title = {Binding site discovery from nucleic acid sequences by discriminative learning of hidden {Markov} models}, volume = {42}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/42/21/12995}, doi = {10.1093/nar/gku1083}, abstract = {We present a discriminative learning method for pattern discovery of binding sites in nucleic acid sequences based on hidden Markov models. Sets of positive and negative example sequences are mined for sequence motifs whose occurrence frequency varies between the sets. The method offers several objective functions, but we concentrate on mutual information of condition and motif occurrence. We perform a systematic comparison of our method and numerous published motif-finding tools. Our method achieves the highest motif discovery performance, while being faster than most published methods. We present case studies of data from various technologies, including ChIP-Seq, RIP-Chip and PAR-CLIP, of embryonic stem cell transcription factors and of RNA-binding proteins, demonstrating practicality and utility of the method. For the alternative splicing factor RBM10, our analysis finds motifs known to be splicing-relevant. The motif discovery method is implemented in the free software package Discrover. It is applicable to genome- and transcriptome-scale data, makes use of available repeat experiments and aside from binary contrasts also more complex data configurations can be utilized.}, language = {en}, number = {21}, urldate = {2016-08-31}, journal = {Nucleic Acids Research}, author = {Maaskola, Jonas and Rajewsky, Nikolaus}, month = dec, year = {2014}, pmid = {25389269}, pages = {12995--13011}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/M2JZH46C/Maaskola et Rajewsky - 2014 - Binding site discovery from nucleic acid sequences.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XRV4RKGA/12995.html:text/html} } @article{zhao_improved_2012, title = {Improved {Models} for {Transcription} {Factor} {Binding} {Site} {Identification} {Using} {Nonindependent} {Interactions}}, volume = {191}, copyright = {Copyright © 2012 by the Genetics Society of America}, issn = {0016-6731, 1943-2631}, url = {http://www.genetics.org/content/191/3/781}, doi = {10.1534/genetics.112.138685}, abstract = {Identifying transcription factor (TF) binding sites is essential for understanding regulatory networks. The specificity of most TFs is currently modeled using position weight matrices (PWMs) that assume the positions within a binding site contribute independently to binding affinity for any site. Extensive, high-throughput quantitative binding assays let us examine, for the first time, the independence assumption for many TFs. We find that the specificity of most TFs is well fit with the simple PWM model, but in some cases more complex models are required. We introduce a binding energy model (BEM) that can include energy parameters for nonindependent contributions to binding affinity. We show that in most cases where a PWM is not sufficient, a BEM that includes energy parameters for adjacent dinucleotide contributions models the specificity very well. Having more accurate models of specificity greatly improves the interpretation of in vivo TF localization data, such as from chromatin immunoprecipitation followed by sequencing (ChIP-seq) experiments.}, language = {en}, number = {3}, urldate = {2016-08-31}, journal = {Genetics}, author = {Zhao, Yue and Ruan, Shuxiang and Pandey, Manishi and Stormo, Gary D.}, month = jul, year = {2012}, pmid = {22505627}, pages = {781--790}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IV477UNE/Zhao et al. - 2012 - Improved Models for Transcription Factor Binding S.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/PZZ2SF6P/781.html:text/html} } @article{kulakovskiy_binding_2013, title = {From binding motifs in chip-seq data to improved models of transcription factor binding sites}, volume = {11}, issn = {0219-7200}, url = {http://www.worldscientific.com/doi/abs/10.1142/S0219720013400040}, doi = {10.1142/S0219720013400040}, abstract = {Chromatin immunoprecipitation followed by deep sequencing (ChIP-Seq) became a method of choice to locate DNA segments bound by different regulatory proteins. ChIP-Seq produces extremely valuable information to study transcriptional regulation. The wet-lab workflow is often supported by downstream computational analysis including construction of models of nucleotide sequences of transcription factor binding sites in DNA, which can be used to detect binding sites in ChIP-Seq data at a single base pair resolution. The most popular TFBS model is represented by positional weight matrix (PWM) with statistically independent positional weights of nucleotides in different columns; such PWMs are constructed from a gapless multiple local alignment of sequences containing experimentally identified TFBSs. Modern high-throughput techniques, including ChIP-Seq, provide enough data for careful training of advanced models containing more parameters than PWM. Yet, many suggested multiparametric models often provide only incremental improvement of TFBS recognition quality comparing to traditional PWMs trained on ChIP-Seq data. We present a novel computational tool, diChIPMunk, that constructs TFBS models as optimal dinucleotide PWMs, thus accounting for correlations between nucleotides neighboring in input sequences. diChIPMunk utilizes many advantages of ChIPMunk, its ancestor algorithm, accounting for ChIP-Seq base coverage profiles ("peak shape") and using the effective subsampling-based core procedure which allows processing of large datasets. We demonstrate that diPWMs constructed by diChIPMunk outperform traditional PWMs constructed by ChIPMunk from the same ChIP-Seq data. Software website: http://autosome.ru/dichipmunk/}, number = {01}, urldate = {2016-08-31}, journal = {Journal of Bioinformatics and Computational Biology}, author = {Kulakovskiy, Ivan and Levitsky, Victor and Oshchepkov, Dmitry and Bryzgalov, Leonid and Vorontsov, Ilya and Makeev, Vsevolod}, month = jan, year = {2013}, pages = {1340004}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UQPRI686/Kulakovskiy et al. - 2013 - From binding motifs in chip-seq data to improved m.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IN3VXZCU/S0219720013400040.html:text/html} } @article{mathelier_jaspar_2014, title = {{JASPAR} 2014: an extensively expanded and updated open-access database of transcription factor binding profiles}, volume = {42}, issn = {0305-1048, 1362-4962}, shorttitle = {{JASPAR} 2014}, url = {http://nar.oxfordjournals.org/content/42/D1/D142}, doi = {10.1093/nar/gkt997}, abstract = {JASPAR (http://jaspar.genereg.net) is the largest open-access database of matrix-based nucleotide profiles describing the binding preference of transcription factors from multiple species. The fifth major release greatly expands the heart of JASPAR—the JASPAR CORE subcollection, which contains curated, non-redundant profiles—with 135 new curated profiles (74 in vertebrates, 8 in Drosophila melanogaster, 10 in Caenorhabditis elegans and 43 in Arabidopsis thaliana; a 30\% increase in total) and 43 older updated profiles (36 in vertebrates, 3 in D. melanogaster and 4 in A. thaliana; a 9\% update in total). The new and updated profiles are mainly derived from published chromatin immunoprecipitation-seq experimental datasets. In addition, the web interface has been enhanced with advanced capabilities in browsing, searching and subsetting. Finally, the new JASPAR release is accompanied by a new BioPython package, a new R tool package and a new R/Bioconductor data package to facilitate access for both manual and automated methods.}, language = {en}, number = {D1}, urldate = {2016-08-31}, journal = {Nucleic Acids Research}, author = {Mathelier, Anthony and Zhao, Xiaobei and Zhang, Allen W. and Parcy, François and Worsley-Hunt, Rebecca and Arenillas, David J. and Buchman, Sorana and Chen, Chih-yu and Chou, Alice and Ienasescu, Hans and Lim, Jonathan and Shyr, Casper and Tan, Ge and Zhou, Michelle and Lenhard, Boris and Sandelin, Albin and Wasserman, Wyeth W.}, month = jan, year = {2014}, pmid = {24194598}, pages = {D142--D147}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/AFFH4HPG/Mathelier et al. - 2014 - JASPAR 2014 an extensively expanded and updated o.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/23IJ7N8C/D142.html:text/html} } @article{man_non-independence_2001, title = {Non-independence of {Mnt} repressor–operator interaction determined by a new quantitative multiple fluorescence relative affinity ({QuMFRA}) assay}, volume = {29}, issn = {0305-1048}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC55749/}, abstract = {Salmonella bacteriophage repressor Mnt belongs to the ribbon–helix–helix class of transcription factors. Previous SELEX results suggested that interactions of Mnt with positions 16 and 17 of the operator DNA are not independent. Using a newly developed high-throughput quantitative multiple fluorescence relative affinity (QuMFRA) assay, we directly quantified the relative equilibrium binding constants (Kref) of Mnt to operators carrying all the possible dinucleotide combinations at these two positions. Results show that Mnt prefers binding to C, instead of wild-type A, at position 16 when wild-type C at position 17 is changed to other bases. The measured Kref values of double mutants were also higher than the values predicted from single mutants, demonstrating the non-independence of these two positions. The ability to produce a large number of quantitative binding data simultaneously and the potential to scale up makes QuMFRA a valuable tool for the large-scale study of macromolecular interaction.}, number = {12}, urldate = {2016-09-02}, journal = {Nucleic Acids Research}, author = {Man, Tsz-Kwong and Stormo, Gary D.}, month = jun, year = {2001}, pmid = {11410653}, pmcid = {PMC55749}, pages = {2471--2478}, file = {PubMed Central Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/JDFJKSJ4/Man et Stormo - 2001 - Non-independence of Mnt repressor–operator interac.pdf:application/pdf} } @article{ibrahim_jamm:_2015, title = {{JAMM}: a peak finder for joint analysis of {NGS} replicates}, volume = {31}, issn = {1367-4803, 1460-2059}, shorttitle = {{JAMM}}, url = {http://bioinformatics.oxfordjournals.org/content/31/1/48}, doi = {10.1093/bioinformatics/btu568}, abstract = {Motivation: Although peak finding in next-generation sequencing (NGS) datasets has been addressed extensively, there is no consensus on how to analyze and process biological replicates. Furthermore, most peak finders do not focus on accurate determination of enrichment site widths and are not widely applicable to different types of datasets. Results: We developed JAMM (Joint Analysis of NGS replicates via Mixture Model clustering): a peak finder that can integrate information from biological replicates, determine enrichment site widths accurately and resolve neighboring narrow peaks. JAMM is a universal peak finder that is applicable to different types of datasets. We show that JAMM is among the best performing peak finders in terms of site detection accuracy and in terms of accurate determination of enrichment sites widths. In addition, JAMM’s replicate integration improves peak spatial resolution, sorting and peak finding accuracy. Availability and implementation: JAMM is available for free and can run on Linux machines through the command line: http://code.google.com/p/jamm-peak-finder Contact: mahmoud.ibrahim@mdc-berlin.de or uwe.ohler@mdc-berlin.de. Supplementary information: Supplementary data are available at Bioinformatics online.}, language = {en}, number = {1}, urldate = {2016-10-17}, journal = {Bioinformatics}, author = {Ibrahim, Mahmoud M. and Lacadie, Scott A. and Ohler, Uwe}, month = jan, year = {2015}, pmid = {25223640}, pages = {48--55}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IMV7KPV6/Ibrahim et al. - 2015 - JAMM a peak finder for joint analysis of NGS repl.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/VJKHWJP7/48.html:text/html} } @article{munteanu_cougerco-factors_2014, title = {{COUGER}—co-factors associated with uniquely-bound genomic regions}, volume = {42}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/42/W1/W461}, doi = {10.1093/nar/gku435}, abstract = {Most transcription factors (TFs) belong to protein families that share a common DNA binding domain and have very similar DNA binding preferences. However, many paralogous TFs (i.e. members of the same TF family) perform different regulatory functions and interact with different genomic regions in the cell. A potential mechanism for achieving this differential in vivo specificity is through interactions with protein co-factors. Computational tools for studying the genomic binding profiles of paralogous TFs and identifying their putative co-factors are currently lacking. Here, we present an interactive web implementation of COUGER, a classification-based framework for identifying protein co-factors that might provide specificity to paralogous TFs. COUGER takes as input two sets of genomic regions bound by paralogous TFs, and it identifies a small set of putative co-factors that best distinguish the two sets of sequences. To achieve this task, COUGER uses a classification approach, with features that reflect the DNA-binding specificities of the putative co-factors. The identified co-factors are presented in a user-friendly output page, together with information that allows the user to understand and to explore the contributions of individual co-factor features. COUGER can be run as a stand-alone tool or through a web interface: http://couger.oit.duke.edu.}, language = {en}, number = {W1}, urldate = {2016-10-17}, journal = {Nucleic Acids Research}, author = {Munteanu, Alina and Ohler, Uwe and Gordân, Raluca}, month = jul, year = {2014}, pmid = {24861628}, pages = {W461--W467}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GHRB6VBH/Munteanu et al. - 2014 - COUGER—co-factors associated with uniquely-bound g.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9A38J3KA/W461.html:text/html} } @article{perner_inference_2014-1, title = {Inference of interactions between chromatin modifiers and histone modifications: from {ChIP}-{Seq} data to chromatin-signaling}, volume = {42}, issn = {0305-1048, 1362-4962}, shorttitle = {Inference of interactions between chromatin modifiers and histone modifications}, url = {http://nar.oxfordjournals.org/content/42/22/13689}, doi = {10.1093/nar/gku1234}, abstract = {Chromatin modifiers and histone modifications are components of a chromatin-signaling network involved in transcription and its regulation. The interactions between chromatin modifiers and histone modifications are often unknown, are based on the analysis of few genes or are studied in vitro. Here, we apply computational methods to recover interactions between chromatin modifiers and histone modifications from genome-wide ChIP-Seq data. These interactions provide a high-confidence backbone of the chromatin-signaling network. Many recovered interactions have literature support; others provide hypotheses about yet unknown interactions. We experimentally verified two of these predicted interactions, leading to a link between H4K20me1 and members of the Polycomb Repressive Complexes 1 and 2. Our results suggest that our computationally derived interactions are likely to lead to novel biological insights required to establish the connectivity of the chromatin-signaling network involved in transcription and its regulation.}, language = {en}, number = {22}, urldate = {2016-10-17}, journal = {Nucleic Acids Research}, author = {Perner, Juliane and Lasserre, Julia and Kinkley, Sarah and Vingron, Martin and Chung, Ho-Ryun}, month = dec, year = {2014}, pmid = {25414326}, pages = {13689--13695}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/22APC2DQ/Perner et al. - 2014 - Inference of interactions between chromatin modifi.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5AKI5GG2/13689.html:text/html} } @article{mendoza-parra_reconstructed_2016, title = {Reconstructed cell fate-regulatory programs in stem cells reveal hierarchies and key factors of neurogenesis}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/early/2016/09/20/gr.208926.116}, doi = {10.1101/gr.208926.116}, abstract = {An international, peer-reviewed genome sciences journal featuring outstanding original research that offers novel insights into the biology of all organisms}, language = {en}, urldate = {2016-10-17}, journal = {Genome Research}, author = {Mendoza-Parra, Marco-Antonio and Malysheva, Valeriya and Saleem, Mohamed Ashick Mohamed and Lieb, Michele and Godel, Aurelie and Gronemeyer, Hinrich}, month = sep, year = {2016}, pmid = {27650846}, pages = {gr.208926.116}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FF6R7IRA/Mendoza-Parra et al. - 2016 - Reconstructed cell fate-regulatory programs in ste.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6G67CP2U/gr.208926.116.html:text/html} } @article{boeva_analysis_2016, title = {Analysis of {Genomic} {Sequence} {Motifs} for {Deciphering} {Transcription} {Factor} {Binding} and {Transcriptional} {Regulation} in {Eukaryotic} {Cells}}, url = {http://journal.frontiersin.org/article/10.3389/fgene.2016.00024/full}, doi = {10.3389/fgene.2016.00024}, abstract = {Eukaryotic genomes contain a variety of structured patterns: repetitive elements, binding sites of DNA and RNA associated proteins, splice sites, and so on. Often, these structured patterns can be formalized as motifs and described using a proper mathematical model such as position weight matrix and IUPAC consensus. Two key tasks are typically carried out for motifs in the context of the analysis of genomic sequences. These are: identification in a set of DNA regions of over-represented motifs from a particular motif database, and de novo discovery of over-represented motifs. Here we describe existing methodology to perform these two tasks for motifs characterizing transcription factor binding. When applied to the output of ChIP-seq and ChIP-exo experiments, or to promoter regions of co-modulated genes, motif analysis techniques allow for the prediction of transcription factor binding events and enable identification of transcriptional regulators and co-regulators. The usefulness of motif analysis is further exemplified in this review by how motif discovery improves peak calling in ChIP-seq and ChIP-exo experiments and, when coupled with information on gene expression, allows insights into physical mechanisms of transcriptional modulation.}, urldate = {2016-11-09}, journal = {Bioinformatics and Computational Biology}, author = {Boeva, Valentina}, year = {2016}, keywords = {Transcription Factors, Binding Sites, motif discovery, position-specific scoring matrices, regulation of gene transcription, ChIP-seq, binding motif models}, pages = {24}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KDGW3PE5/Boeva - 2016 - Analysis of Genomic Sequence Motifs for Decipherin.pdf:application/pdf} } @article{kulakovskiy_deeper_2011, title = {A deeper look into transcription regulatory code by preferred pair distance templates for transcription factor binding sites}, volume = {27}, issn = {1367-4803, 1460-2059}, url = {http://bioinformatics.oxfordjournals.org/content/27/19/2621}, doi = {10.1093/bioinformatics/btr453}, abstract = {Motivation: Modern experimental methods provide substantial information on protein–DNA recognition. Studying arrangements of transcription factor binding sites (TFBSs) of interacting transcription factors (TFs) advances understanding of the transcription regulatory code. Results: We constructed binding motifs for TFs forming a complex with HIF-1α at the erythropoietin 3′-enhancer. Corresponding TFBSs were predicted in the segments around transcription start sites (TSSs) of all human genes. Using the genome-wide set of regulatory regions, we observed several strongly preferred distances between hypoxia-responsive element (HRE) and binding sites of a particular cofactor protein. The set of preferred distances was called as a preferred pair distance template (PPDT). PPDT dramatically depended on the TF and orientation of its binding sites relative to HRE. PPDT evaluated from the genome-wide set of regulatory sequences was used to detect significant PPDT-consistent binding site pairs in regulatory regions of hypoxia-responsive genes. We believe PPDT can help to reveal the layout of eukaryotic regulatory segments. Contact: ivan.kulakovskiy@gmail.com Supplementary information: Supplementary data are available at Bioinformatics online.}, language = {en}, number = {19}, urldate = {2016-11-25}, journal = {Bioinformatics}, author = {Kulakovskiy, I. V. and Belostotsky, A. A. and Kasianov, A. S. and Esipova, N. G. and Medvedeva, Y. A. and Eliseeva, I. A. and Makeev, V. J.}, month = oct, year = {2011}, pmid = {21852305}, pages = {2621--2624}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/VPI9SK28/Kulakovskiy et al. - 2011 - A deeper look into transcription regulatory code b.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TSHP7AW7/2621.html:text/html} } @article{kulakovskiy_hocomoco:_2013, title = {{HOCOMOCO}: a comprehensive collection of human transcription factor binding sites models}, volume = {41}, issn = {0305-1048, 1362-4962}, shorttitle = {{HOCOMOCO}}, url = {http://nar.oxfordjournals.org/content/41/D1/D195}, doi = {10.1093/nar/gks1089}, abstract = {Transcription factor (TF) binding site (TFBS) models are crucial for computational reconstruction of transcription regulatory networks. In existing repositories, a TF often has several models (also called binding profiles or motifs), obtained from different experimental data. Having a single TFBS model for a TF is more pragmatic for practical applications. We show that integration of TFBS data from various types of experiments into a single model typically results in the improved model quality probably due to partial correction of source specific technique bias. We present the Homo sapiens comprehensive model collection (HOCOMOCO, http://autosome.ru/HOCOMOCO/, http://cbrc.kaust.edu.sa/hocomoco/) containing carefully hand-curated TFBS models constructed by integration of binding sequences obtained by both low- and high-throughput methods. To construct position weight matrices to represent these TFBS models, we used ChIPMunk software in four computational modes, including newly developed periodic positional prior mode associated with DNA helix pitch. We selected only one TFBS model per TF, unless there was a clear experimental evidence for two rather distinct TFBS models. We assigned a quality rating to each model. HOCOMOCO contains 426 systematically curated TFBS models for 401 human TFs, where 172 models are based on more than one data source.}, language = {en}, number = {D1}, urldate = {2016-11-25}, journal = {Nucleic Acids Research}, author = {Kulakovskiy, Ivan V. and Medvedeva, Yulia A. and Schaefer, Ulf and Kasianov, Artem S. and Vorontsov, Ilya E. and Bajic, Vladimir B. and Makeev, Vsevolod J.}, month = jan, year = {2013}, pmid = {23175603}, pages = {D195--D202}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4ZXU8QGZ/Kulakovskiy et al. - 2013 - HOCOMOCO a comprehensive collection of human tran.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UI3C8F8N/D195.html:text/html} } @article{kulakovskiy_hocomoco:_2016, title = {{HOCOMOCO}: expansion and enhancement of the collection of transcription factor binding sites models}, volume = {44}, issn = {0305-1048, 1362-4962}, shorttitle = {{HOCOMOCO}}, url = {http://nar.oxfordjournals.org/content/44/D1/D116}, doi = {10.1093/nar/gkv1249}, abstract = {Models of transcription factor (TF) binding sites provide a basis for a wide spectrum of studies in regulatory genomics, from reconstruction of regulatory networks to functional annotation of transcripts and sequence variants. While TFs may recognize different sequence patterns in different conditions, it is pragmatic to have a single generic model for each particular TF as a baseline for practical applications. Here we present the expanded and enhanced version of HOCOMOCO (http://hocomoco.autosome.ru and http://www.cbrc.kaust.edu.sa/hocomoco10), the collection of models of DNA patterns, recognized by transcription factors. HOCOMOCO now provides position weight matrix (PWM) models for binding sites of 601 human TFs and, in addition, PWMs for 396 mouse TFs. Furthermore, we introduce the largest up to date collection of dinucleotide PWM models for 86 (52) human (mouse) TFs. The update is based on the analysis of massive ChIP-Seq and HT-SELEX datasets, with the validation of the resulting models on in vivo data. To facilitate a practical application, all HOCOMOCO models are linked to gene and protein databases (Entrez Gene, HGNC, UniProt) and accompanied by precomputed score thresholds. Finally, we provide command-line tools for PWM and diPWM threshold estimation and motif finding in nucleotide sequences.}, language = {en}, number = {D1}, urldate = {2016-11-25}, journal = {Nucleic Acids Research}, author = {Kulakovskiy, Ivan V. and Vorontsov, Ilya E. and Yevshin, Ivan S. and Soboleva, Anastasiia V. and Kasianov, Artem S. and Ashoor, Haitham and Ba-alawi, Wail and Bajic, Vladimir B. and Medvedeva, Yulia A. and Kolpakov, Fedor A. and Makeev, Vsevolod J.}, month = jan, year = {2016}, pmid = {26586801}, pages = {D116--D125}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/2W77ZX4K/Kulakovskiy et al. - 2016 - HOCOMOCO expansion and enhancement of the collect.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MZ3QRPWE/D116.html:text/html} } @article{kulakovskiy_deep_2010, title = {Deep and wide digging for binding motifs in {ChIP}-{Seq} data}, volume = {26}, issn = {1367-4803, 1460-2059}, url = {http://bioinformatics.oxfordjournals.org/content/26/20/2622}, doi = {10.1093/bioinformatics/btq488}, abstract = {Summary: ChIP-Seq data are a new challenge for motif discovery. Such a data typically consists of thousands of DNA segments with base-specific coverage values. We present a new version of our DNA motif discovery software ChIPMunk adapted for ChIP-Seq data. ChIPMunk is an iterative algorithm that combines greedy optimization with bootstrapping and uses coverage profiles as motif positional preferences. ChIPMunk does not require truncation of long DNA segments and it is practical for processing up to tens of thousands of data sequences. Comparison with traditional (MEME) or ChIP-Seq-oriented (HMS) motif discovery tools shows that ChIPMunk identifies the correct motifs with the same or better quality but works dramatically faster. Availability and implementation: ChIPMunk is freely available within the ru\_genetika Java package: http://line.imb.ac.ru/ChIPMunk. Web-based version is also available. Contact: ivan.kulakovskiy@gmail.com Supplementary information: Supplementary data are available at Bioinformatics online.}, language = {en}, number = {20}, urldate = {2016-11-25}, journal = {Bioinformatics}, author = {Kulakovskiy, I. V. and Boeva, V. A. and Favorov, A. V. and Makeev, V. J.}, month = oct, year = {2010}, pmid = {20736340}, pages = {2622--2623}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4IXGI69U/Kulakovskiy et al. - 2010 - Deep and wide digging for binding motifs in ChIP-S.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FPC6UKKW/2622.html:text/html} } @article{boeva_exact_2007, title = {Exact p-value calculation for heterotypic clusters of regulatory motifs and its application in computational annotation of cis-regulatory modules}, volume = {2}, issn = {1748-7188}, url = {http://dx.doi.org/10.1186/1748-7188-2-13}, doi = {10.1186/1748-7188-2-13}, abstract = {cis-Regulatory modules (CRMs) of eukaryotic genes often contain multiple binding sites for transcription factors. The phenomenon that binding sites form clusters in CRMs is exploited in many algorithms to locate CRMs in a genome. This gives rise to the problem of calculating the statistical significance of the event that multiple sites, recognized by different factors, would be found simultaneously in a text of a fixed length. The main difficulty comes from overlapping occurrences of motifs. So far, no tools have been developed allowing the computation of p-values for simultaneous occurrences of different motifs which can overlap.}, urldate = {2016-11-30}, journal = {Algorithms for Molecular Biology}, author = {Boeva, Valentina and Clément, Julien and Régnier, Mireille and Roytberg, Mikhail A. and Makeev, Vsevolod J.}, year = {2007}, pages = {13}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/JGZCT7TD/Boeva et al. - 2007 - Exact p-value calculation for heterotypic clusters.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8ZZ9BTCW/1748-7188-2-13.html:text/html} } @article{guo_high_2012, title = {High {Resolution} {Genome} {Wide} {Binding} {Event} {Finding} and {Motif} {Discovery} {Reveals} {Transcription} {Factor} {Spatial} {Binding} {Constraints}}, volume = {8}, issn = {1553-7358}, url = {http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1002638}, doi = {10.1371/journal.pcbi.1002638}, abstract = {Author Summary The letters in our genome spell words and phrases that control when each gene is activated. To understand how these words and phrases function in health and disease, we have developed a new computational method to determine what word positions in our genomic text are used by each genome regulatory protein, and how these active words are spaced relative to one another. Our method achieves exceptional spatial accuracy by integrating experimental data with the text of our genome to find the precise words that are regulated by each protein factor. Using this analysis we have discovered novel word spacings in the experimental data that suggest novel genome grammatical control constructs.}, number = {8}, urldate = {2016-11-30}, journal = {PLOS Computational Biology}, author = {Guo, Yuchun and Mahony, Shaun and Gifford, David K.}, month = aug, year = {2012}, keywords = {Transcription Factors, Sequence motif analysis, Sequence alignment, Mammalian genomics, Genomic databases, Binding analysis, Cell binding assay, Cell binding}, pages = {e1002638}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QQ456JAQ/Guo et al. - 2012 - High Resolution Genome Wide Binding Event Finding .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/W83PTGJE/article.html:text/html} } @article{starick_chip-exo_2015, title = {{ChIP}-exo signal associated with {DNA}-binding motifs provides insight into the genomic binding of the glucocorticoid receptor and cooperating transcription factors}, volume = {25}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/25/6/825}, doi = {10.1101/gr.185157.114}, abstract = {An international, peer-reviewed genome sciences journal featuring outstanding original research that offers novel insights into the biology of all organisms}, language = {en}, number = {6}, urldate = {2016-11-30}, journal = {Genome Research}, author = {Starick, Stephan R. and Ibn-Salem, Jonas and Jurk, Marcel and Hernandez, Céline and Love, Michael I. and Chung, Ho-Ryun and Vingron, Martin and Thomas-Chollier, Morgane and Meijsing, Sebastiaan H.}, month = jun, year = {2015}, pmid = {25720775}, pages = {825--835}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/DBUJKDFD/Starick et al. - 2015 - ChIP-exo signal associated with DNA-binding motifs.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/89CTWNPR/825.full.html:text/html} } @article{jonkers_getting_2015, title = {Getting up to speed with transcription elongation by {RNA} polymerase {II}}, volume = {16}, copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1471-0072}, url = {http://www.nature.com/nrm/journal/v16/n3/full/nrm3953.html}, doi = {10.1038/nrm3953}, abstract = {Recent advances in sequencing techniques that measure nascent transcripts and that reveal the positioning of RNA polymerase II (Pol II) have shown that the pausing of Pol II in promoter-proximal regions and its release to initiate a phase of productive elongation are key steps in transcription regulation. Moreover, after the release of Pol II from the promoter-proximal region, elongation rates are highly dynamic throughout the transcription of a gene, and vary on a gene-by-gene basis. Interestingly, Pol II elongation rates affect co-transcriptional processes such as splicing, termination and genome stability. Increasing numbers of factors and regulatory mechanisms have been associated with the steps of transcription elongation by Pol II, revealing that elongation is a highly complex process. Elongation is thus now recognized as a key phase in the regulation of transcription by Pol II.}, language = {en}, number = {3}, urldate = {2016-12-16}, journal = {Nature Reviews Molecular Cell Biology}, author = {Jonkers, Iris and Lis, John T.}, month = mar, year = {2015}, keywords = {transcriptomics, transcription}, pages = {167--177}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/D6XBEGWR/Jonkers and Lis - 2015 - Getting up to speed with transcription elongation .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UCEUN62Q/nrm3953.html:text/html} } @article{heinz_selection_2015, title = {The selection and function of cell type-specific enhancers}, volume = {16}, copyright = {© 2015 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1471-0072}, url = {http://www.nature.com/nrm/journal/v16/n3/full/nrm3949.html}, doi = {10.1038/nrm3949}, abstract = {The human body contains several hundred cell types, all of which share the same genome. In metazoans, much of the regulatory code that drives cell type-specific gene expression is located in distal elements called enhancers. Although mammalian genomes contain millions of potential enhancers, only a small subset of them is active in a given cell type. Cell type-specific enhancer selection involves the binding of lineage-determining transcription factors that prime enhancers. Signal-dependent transcription factors bind to primed enhancers, which enables these broadly expressed factors to regulate gene expression in a cell type-specific manner. The expression of genes that specify cell type identity and function is associated with densely spaced clusters of active enhancers known as super-enhancers. The functions of enhancers and super-enhancers are influenced by, and affect, higher-order genomic organization.}, language = {en}, number = {3}, urldate = {2016-12-16}, journal = {Nature Reviews Molecular Cell Biology}, author = {Heinz, Sven and Romanoski, Casey E. and Benner, Christopher and Glass, Christopher K.}, month = mar, year = {2015}, keywords = {Transcriptional regulatory elements, Transcription Factors, transcription}, pages = {144--154}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/2XKV3K9Z/Heinz et al. - 2015 - The selection and function of cell type-specific e.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/PB9S77UU/nrm3949.html:text/html} } @article{mavrich_barrier_2008, title = {A barrier nucleosome model for statistical positioning of nucleosomes throughout the yeast genome}, volume = {18}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/18/7/1073}, doi = {10.1101/gr.078261.108}, abstract = {Most nucleosomes are well-organized at the 5′ ends of S. cerevisiae genes where “−1” and “+1” nucleosomes bracket a nucleosome-free promoter region (NFR). How nucleosomal organization is specified by the genome is less clear. Here we establish and inter-relate rules governing genomic nucleosome organization by sequencing DNA from more than one million immunopurified S. cerevisiae nucleosomes (displayed at http://atlas.bx.psu.edu/). Evidence is presented that the organization of nucleosomes throughout genes is largely a consequence of statistical packing principles. The genomic sequence specifies the location of the −1 and +1 nucleosomes. The +1 nucleosome forms a barrier against which nucleosomes are packed, resulting in uniform positioning, which decays at farther distances from the barrier. We present evidence for a novel 3′ NFR that is present at {\textgreater}95\% of all genes. 3′ NFRs may be important for transcription termination and anti-sense initiation. We present a high-resolution genome-wide map of TFIIB locations that implicates 3′ NFRs in gene looping.}, language = {en}, number = {7}, urldate = {2016-12-21}, journal = {Genome Research}, author = {Mavrich, Travis N. and Ioshikhes, Ilya P. and Venters, Bryan J. and Jiang, Cizhong and Tomsho, Lynn P. and Qi, Ji and Schuster, Stephan C. and Albert, Istvan and Pugh, B. Franklin}, month = jul, year = {2008}, pmid = {18550805}, pages = {1073--1083}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Z3E3FSWP/Mavrich et al. - 2008 - A barrier nucleosome model for statistical positio.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9HHGETWS/1073.html:text/html} } @article{wasson_ensemble_2009, title = {An ensemble model of competitive multi-factor binding of the genome}, volume = {19}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/19/11/2101}, doi = {10.1101/gr.093450.109}, abstract = {Hundreds of different factors adorn the eukaryotic genome, binding to it in large number. These DNA binding factors (DBFs) include nucleosomes, transcription factors (TFs), and other proteins and protein complexes, such as the origin recognition complex (ORC). DBFs compete with one another for binding along the genome, yet many current models of genome binding do not consider different types of DBFs together simultaneously. Additionally, binding is a stochastic process that results in a continuum of binding probabilities at any position along the genome, but many current models tend to consider positions as being either binding sites or not. Here, we present a model that allows a multitude of DBFs, each at different concentrations, to compete with one another for binding sites along the genome. The result is an “occupancy profile,” a probabilistic description of the DNA occupancy of each factor at each position. We implement our model efficiently as the software package COMPETE. We demonstrate genome-wide and at specific loci how modeling nucleosome binding alters TF binding, and vice versa, and illustrate how factor concentration influences binding occupancy. Binding cooperativity between nearby TFs arises implicitly via mutual competition with nucleosomes. Our method applies not only to TFs, but also recapitulates known occupancy profiles of a well-studied replication origin with and without ORC binding. Importantly, the sequence preferences our model takes as input are derived from in vitro experiments. This ensures that the calculated occupancy profiles are the result of the forces of competition represented explicitly in our model and the inherent sequence affinities of the constituent DBFs.}, language = {en}, number = {11}, urldate = {2016-12-21}, journal = {Genome Research}, author = {Wasson, Todd and Hartemink, Alexander J.}, month = nov, year = {2009}, pmid = {19720867}, pages = {2101--2112}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/T8QQ3CRU/Wasson and Hartemink - 2009 - An ensemble model of competitive multi-factor bind.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/B2RHGVV5/2101.html:text/html} } @book{mcginty_robert_k._and_tan_song_fundamentals_2014, address = {New York}, edition = {2014}, title = {Fundamentals of {Chromatin}, chapter 1 {Histone}, {Nucleosomes} and {Chromatin} {Structure}}, isbn = {978-1-4614-8623-7}, url = {http://www.springer.com/biomed/human+genetics/book/978-1-4614-8623-7}, abstract = {Chromatin is the combination of \{DNA\} and proteins that make up the genetic material of chromosomes. It is essential for packaging \{DNA\}, regulation of gene expression, \{DNA\} replication and repair. The audience for this book includes ...}, publisher = {Jerry L. Workman and Susan M. Abmayr}, author = {{McGinty Robert K. and Tan Song}}, year = {2014} } @article{cirillo_opening_2002, title = {Opening of {Compacted} {Chromatin} by {Early} {Developmental} {Transcription} {Factors} {HNF}3 ({FoxA}) and {GATA}-4}, volume = {9}, issn = {1097-2765}, url = {http://www.sciencedirect.com/science/article/pii/S1097276502004598}, doi = {10.1016/S1097-2765(02)00459-8}, abstract = {The transcription factors HNF3 (FoxA) and GATA-4 are the earliest known to bind the albumin gene enhancer in liver precursor cells in embryos. To understand how they access sites in silent chromatin, we assembled nucleosome arrays containing albumin enhancer sequences and compacted them with linker histone. HNF3 and GATA-4, but not NF-1, C/EBP, and GAL4-AH, bound their sites in compacted chromatin and opened the local nucleosomal domain in the absence of ATP-dependent enzymes. The ability of HNF3 to open chromatin is mediated by a high affinity DNA binding site and by the C-terminal domain of the protein, which binds histones H3 and H4. Thus, factors that potentiate transcription in development are inherently capable of initiating chromatin opening events.}, number = {2}, urldate = {2016-12-21}, journal = {Molecular Cell}, author = {Cirillo, Lisa Ann and Lin, Frank Robert and Cuesta, Isabel and Friedman, Dara and Jarnik, Michal and Zaret, Kenneth S}, month = feb, year = {2002}, pages = {279--289}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ZWHQ5CF9/Cirillo et al. - 2002 - Opening of Compacted Chromatin by Early Developmen.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ZXQVJ2PE/S1097276502004598.html:text/html} } @article{cirillo_early_1999, title = {An {Early} {Developmental} {Transcription} {Factor} {Complex} that {Is} {More} {Stable} on {Nucleosome} {Core} {Particles} {Than} on {Free} {DNA}}, volume = {4}, issn = {1097-2765}, url = {http://www.sciencedirect.com/science/article/pii/S1097276500802257}, doi = {10.1016/S1097-2765(00)80225-7}, abstract = {In vivo footprinting studies have shown that transcription factor binding sites for HNF3 and GATA-4 are occupied on the albumin gene enhancer in embryonic endoderm, prior to the developmental activation of liver gene transcription. We have investigated how these factors can stably occupy silent chromatin. Remarkably, we find that HNF3, but not GATA-4 or a GAL4 control protein, binds far more stably to nucleosome core particles than to free DNA. In the presence of HNF3, GATA-4 binds stably to an HNF3-positioned nucleosome. Histone acetylation does not affect HNF3 binding. This is evidence for stable nucleosome binding by a transcription factor and shows that a winged helix protein is sufficient to initiate the assembly of an enhancer complex on nonacetylated nucleosomes.}, number = {6}, urldate = {2016-12-21}, journal = {Molecular Cell}, author = {Cirillo, Lisa Ann and Zaret, Kenneth S}, month = dec, year = {1999}, pages = {961--969}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/P56D6XMZ/Cirillo and Zaret - 1999 - An Early Developmental Transcription Factor Comple.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XI82F8KX/S1097276500802257.html:text/html} } @article{sekiya_direct_2011, title = {Direct conversion of mouse fibroblasts to hepatocyte-like cells by defined factors}, volume = {475}, copyright = {© 2011 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v475/n7356/abs/nature10263.html}, doi = {10.1038/nature10263}, abstract = {The location and timing of cellular differentiation must be stringently controlled for proper organ formation. Normally, hepatocytes differentiate from hepatic progenitor cells to form the liver during development. However, previous studies have shown that the hepatic program can also be activated in non-hepatic lineage cells after exposure to particular stimuli or fusion with hepatocytes. These unexpected findings suggest that factors critical to hepatocyte differentiation exist and become activated to induce hepatocyte-specific properties in different cell types. Here, by screening the effects of twelve candidate factors, we identify three specific combinations of two transcription factors, comprising Hnf4α plus Foxa1, Foxa2 or Foxa3, that can convert mouse embryonic and adult fibroblasts into cells that closely resemble hepatocytes in vitro. The induced hepatocyte-like (iHep) cells have multiple hepatocyte-specific features and reconstitute damaged hepatic tissues after transplantation. The generation of iHep cells may provide insights into the molecular nature of hepatocyte differentiation and potential therapies for liver diseases.}, language = {en}, number = {7356}, urldate = {2016-12-21}, journal = {Nature}, author = {Sekiya, Sayaka and Suzuki, Atsushi}, month = jul, year = {2011}, keywords = {biotechnology, medical research}, pages = {390--393}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QXVHEIQK/Sekiya and Suzuki - 2011 - Direct conversion of mouse fibroblasts to hepatocy.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/VXXU93UV/nature10263.html:text/html} } @article{huang_induction_2011, title = {Induction of functional hepatocyte-like cells from mouse fibroblasts by defined factors}, volume = {475}, copyright = {© 2011 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v475/n7356/full/nature10116.html%3Fmessage-global%3Dremove}, doi = {10.1038/nature10116}, abstract = {The generation of functional hepatocytes independent of donor liver organs is of great therapeutic interest with regard to regenerative medicine and possible cures for liver disease. Induced hepatic differentiation has been achieved previously using embryonic stem cells or induced pluripotent stem cells. Particularly, hepatocytes generated from a patient/'s own induced pluripotent stem cells could theoretically avoid immunological rejection. However, the induction of hepatocytes from induced pluripotent stem cells is a complicated process that would probably be replaced with the arrival of improved technology. Overexpression of lineage-specific transcription factors directly converts terminally differentiated cells into some other lineages, including neurons, cardiomyocytes and blood progenitors; however, it remains unclear whether these lineage-converted cells could repair damaged tissues in vivo. Here we demonstrate the direct induction of functional hepatocyte-like (iHep) cells from mouse tail-tip fibroblasts by transduction of Gata4, Hnf1α and Foxa3, and inactivation of p19Arf. iHep cells show typical epithelial morphology, express hepatic genes and acquire hepatocyte functions. Notably, transplanted iHep cells repopulate the livers of fumarylacetoacetate-hydrolase-deficient (Fah-/-) mice and rescue almost half of recipients from death by restoring liver functions. Our study provides a novel strategy to generate functional hepatocyte-like cells for the purpose of liver engineering and regenerative medicine.}, language = {en}, number = {7356}, urldate = {2016-12-21}, journal = {Nature}, author = {Huang, Pengyu and He, Zhiying and Ji, Shuyi and Sun, Huawang and Xiang, Dao and Liu, Changcheng and Hu, Yiping and Wang, Xin and Hui, Lijian}, month = jul, year = {2011}, keywords = {stem cells, Cell Biology}, pages = {386--389}, file = {Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/G2NSW4FG/nature10116.html:text/html} } @article{lawrence_expectation_1990, title = {An expectation maximization ({EM}) algorithm for the identification and characterization of common sites in unaligned biopolymer sequences}, volume = {7}, issn = {0887-3585}, doi = {10.1002/prot.340070105}, abstract = {Statistical methodology for the identification and characterization of protein binding sites in a set of unaligned DNA fragments is presented. Each sequence must contain at least one common site. No alignment of the sites is required. Instead, the uncertainty in the location of the sites is handled by employing the missing information principle to develop an "expectation maximization" (EM) algorithm. This approach allows for the simultaneous identification of the sites and characterization of the binding motifs. The reliability of the algorithm increases with the number of fragments, but the computations increase only linearly. The method is illustrated with an example, using known cyclic adenosine monophosphate receptor protein (CRP) binding sites. The final motif is utilized in a search for undiscovered CRP binding sites.}, language = {eng}, number = {1}, journal = {Proteins}, author = {Lawrence, C. E. and Reilly, A. A.}, year = {1990}, pmid = {2184437}, keywords = {Algorithms, DNA-binding proteins, Base Sequence, Binding Sites, Escherichia coli, Information Systems, Molecular Sequence Data, Nucleic Acid Conformation, Receptors, Cyclic AMP, Statistics as Topic}, pages = {41--51}, file = {Lawrence90.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Lawrence90.pdf:application/pdf} } @article{schneider_sequence_1990, title = {Sequence logos: a new way to display consensus sequences}, volume = {18}, issn = {0305-1048, 1362-4962}, shorttitle = {Sequence logos}, url = {http://nar.oxfordjournals.org/content/18/20/6097}, doi = {10.1093/nar/18.20.6097}, abstract = {A graphical method is presented for displaying the patterns in a set of aligned sequences. The characters representing the sequence are stacked on top of each other for each position in the aligned sequences. The height of each letter is made proportional to Its frequency, and the letters are sorted so the most common one is on top. The height of the entire stack is then adjusted to signify the information content of the sequences at that position. From these ‘sequence logos’, one can determine not only the consensus sequence but also the relative frequency of bases and the information content (measured In bits) at every position in a site or sequence. The logo displays both significant residues and subtle sequence patterns.}, language = {en}, number = {20}, urldate = {2016-12-23}, journal = {Nucleic Acids Research}, author = {Schneider, Thomas D. and Stephens, R. Michael}, month = oct, year = {1990}, pmid = {2172928}, pages = {6097--6100}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/F4MJBIVJ/Schneider and Stephens - 1990 - Sequence logos a new way to display consensus seq.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XPZMIRWU/6097.html:text/html} } @article{stormo_dna_2000, title = {{DNA} binding sites: representation and discovery}, volume = {16}, issn = {1367-4803, 1460-2059}, shorttitle = {{DNA} binding sites}, url = {http://bioinformatics.oxfordjournals.org/content/16/1/16}, doi = {10.1093/bioinformatics/16.1.16}, language = {en}, number = {1}, urldate = {2016-12-27}, journal = {Bioinformatics}, author = {Stormo, Gary D.}, month = jan, year = {2000}, pmid = {10812473}, pages = {16--23}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BWUSTADT/Stormo - 2000 - DNA binding sites representation and discovery.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/DEDTEW2G/16.html:text/html} } @article{whitington_inferring_2011, title = {Inferring transcription factor complexes from {ChIP}-seq data}, volume = {39}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/39/15/e98}, doi = {10.1093/nar/gkr341}, abstract = {Chromatin immunoprecipitation followed by high-throughput sequencing (ChIP-seq) allows researchers to determine the genome-wide binding locations of individual transcription factors (TFs) at high resolution. This information can be interrogated to study various aspects of TF behaviour, including the mechanisms that control TF binding. Physical interaction between TFs comprises one important aspect of TF binding in eukaryotes, mediating tissue-specific gene expression. We have developed an algorithm, spaced motif analysis (SpaMo), which is able to infer physical interactions between the given TF and TFs bound at neighbouring sites at the DNA interface. The algorithm predicts TF interactions in half of the ChIP-seq data sets we test, with the majority of these predictions supported by direct evidence from the literature or evidence of homodimerization. High resolution motif spacing information obtained by this method can facilitate an improved understanding of individual TF complex structures. SpaMo can assist researchers in extracting maximum information relating to binding mechanisms from their TF ChIP-seq data. SpaMo is available for download and interactive use as part of the MEME Suite (http://meme.nbcr.net).}, language = {en}, number = {15}, urldate = {2016-12-28}, journal = {Nucleic Acids Research}, author = {Whitington, Tom and Frith, Martin C. and Johnson, James and Bailey, Timothy L.}, month = aug, year = {2011}, pmid = {21602262}, pages = {e98--e98}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BPH3Q5R4/Whitington et al. - 2011 - Inferring transcription factor complexes from ChIP.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TZPCKMJ3/e98.html:text/html} } @article{wolberger_multiprotein-dna_1999, title = {Multiprotein-{Dna} {Complexes} in {Transcriptional} {Regulation}}, volume = {28}, url = {http://dx.doi.org/10.1146/annurev.biophys.28.1.29}, doi = {10.1146/annurev.biophys.28.1.29}, abstract = {Transcription in eukaryotes is frequently regulated by a mechanism termed combinatorial control, whereby several different proteins must bind DNA in concert to achieve appropriate regulation of the downstream gene. X-ray crystallographic studies of multiprotein complexes bound to DNA have been carried out to investigate the molecular determinants of complex assembly and DNA binding. This work has provided important insights into the specific protein-protein and protein-DNA interactions that govern the assembly of multiprotein regulatory complexes. The results of these studies are reviewed here, and the general insights into the mechanism of combinatorial gene regulation are discussed.}, number = {1}, urldate = {2016-12-28}, journal = {Annual Review of Biophysics and Biomolecular Structure}, author = {Wolberger, Cynthia}, year = {1999}, pmid = {10410794}, keywords = {Gene regulation, transcription, protein-DNA interactions, crystal structure, combinatorial control}, pages = {29--56}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UBG5A8J8/Wolberger - 1999 - Multiprotein-Dna Complexes in Transcriptional Regu.pdf:application/pdf} } @article{ravasi_atlas_2010, title = {An {Atlas} of {Combinatorial} {Transcriptional} {Regulation} in {Mouse} and {Man}}, volume = {140}, issn = {0092-8674}, url = {http://www.sciencedirect.com/science/article/pii/S0092867410000796}, doi = {10.1016/j.cell.2010.01.044}, abstract = {Summary Combinatorial interactions among transcription factors are critical to directing tissue-specific gene expression. To build a global atlas of these combinations, we have screened for physical interactions among the majority of human and mouse DNA-binding transcription factors (TFs). The complete networks contain 762 human and 877 mouse interactions. Analysis of the networks reveals that highly connected TFs are broadly expressed across tissues, and that roughly half of the measured interactions are conserved between mouse and human. The data highlight the importance of TF combinations for determining cell fate, and they lead to the identification of a SMAD3/FLI1 complex expressed during development of immunity. The availability of large TF combinatorial networks in both human and mouse will provide many opportunities to study gene regulation, tissue differentiation, and mammalian evolution.}, number = {5}, urldate = {2016-12-28}, journal = {Cell}, author = {Ravasi, Timothy and Suzuki, Harukazu and Cannistraci, Carlo Vittorio and Katayama, Shintaro and Bajic, Vladimir B. and Tan, Kai and Akalin, Altuna and Schmeier, Sebastian and Kanamori-Katayama, Mutsumi and Bertin, Nicolas and Carninci, Piero and Daub, Carsten O. and Forrest, Alistair R. R. and Gough, Julian and Grimmond, Sean and Han, Jung-Hoon and Hashimoto, Takehiro and Hide, Winston and Hofmann, Oliver and Kamburov, Atanas and Kaur, Mandeep and Kawaji, Hideya and Kubosaki, Atsutaka and Lassmann, Timo and van Nimwegen, Erik and MacPherson, Cameron Ross and Ogawa, Chihiro and Radovanovic, Aleksandar and Schwartz, Ariel and Teasdale, Rohan D. and Tegnér, Jesper and Lenhard, Boris and Teichmann, Sarah A. and Arakawa, Takahiro and Ninomiya, Noriko and Murakami, Kayoko and Tagami, Michihira and Fukuda, Shiro and Imamura, Kengo and Kai, Chikatoshi and Ishihara, Ryoko and Kitazume, Yayoi and Kawai, Jun and Hume, David A. and Ideker, Trey and Hayashizaki, Yoshihide}, month = mar, year = {2010}, keywords = {DNA}, pages = {744--752}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SHBZTQ2U/Ravasi et al. - 2010 - An Atlas of Combinatorial Transcriptional Regulati.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ZEKZPIRS/S0092867410000796.html:text/html} } @book{marsland_machine_2015, address = {Boca Raton}, edition = {Second Edition}, title = {Machine {Learning}, {An} algorithmic {Perspective}, {Chapter} 3 {Neurons}, {Neural} {Network} and {Linear} {Discriminants}}, isbn = {978-1-4398-8921-3}, url = {https://www.crcpress.com/Machine-Learning-An-Algorithmic-Perspective/Marsland/p/book/9781439889213}, publisher = {CRC Press}, author = {Marsland, Stephen}, year = {2015} } @article{hertz_identification_1990, title = {Identification of consensus patterns in unaligned {DNA} sequences known to be functionally related}, volume = {6}, issn = {0266-7061}, abstract = {We have developed a method for identifying consensus patterns in a set of unaligned DNA sequences known to bind a common protein or to have some other common biochemical function. The method is based on a matrix representation of binding site patterns. Each row of the matrix represents one of the four possible bases, each column represents one of the positions of the binding site and each element is determined by the frequency the indicated base occurs at the indicated position. The goal of the method is to find the most significant matrix--i.e. the one with the lowest probability of occurring by chance--out of all the matrices that can be formed from the set of related sequences. The reliability of the method improves with the number of sequences, while the time required increases only linearly with the number of sequences. To test this method, we analysed 11 DNA sequences containing promoters regulated by the Escherichia coli LexA protein. The matrices we found were consistent with the known consensus sequence, and could distinguish the generally accepted LexA binding sites from other DNA sequences.}, language = {eng}, number = {2}, journal = {Computer applications in the biosciences: CABIOS}, author = {Hertz, G. Z. and Hartzell, G. W. and Stormo, G. D.}, month = apr, year = {1990}, pmid = {2193692}, keywords = {DNA, Algorithms, Base Sequence, Binding Sites, Escherichia coli, Molecular Sequence Data, Bacterial Proteins, DNA, Bacterial, Genes, Bacterial, Pattern Recognition, Automated, Serine Endopeptidases, Software}, pages = {81--92}, file = {Hertz90.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Hertz90.pdf:application/pdf} } @article{shi_transcriptional_1991, title = {Transcriptional repression by {YY}1, a human {GLI}-{Krüippel}-related protein, and relief of repression by adenovirus {E}1A protein}, volume = {67}, issn = {0092-8674}, url = {http://www.sciencedirect.com/science/article/pii/0092867491901896}, doi = {10.1016/0092-8674(91)90189-6}, abstract = {A sequence within the transcription control region of the adeno-associated virus P5 promoter has been shown to mediate transcriptional activation by the adenovirus E1A protein. We report here that this same element mediates transcriptional repression in the abaence of E1A. Two cellular proteins have been found to bind to overlapping regions within this sequence element. One of these proteins, YY1, is responsible for the repression. E1A relieves repression exerted by YY1 and further activates transcription through its binding site. A YYl-specific cDNA has been isolated. Its sequence reveals YY1 to be a zinc finger protein that belongs to the GLI-Krüppel gene family. The product of the cDNA binds to YY1 sites. When fused to the GAL4 DNA-binding domain, it is capable of repressing transcription directed by a promoter that contains GAL4- binding sites, and E1A proteins can relieve the repression and activate transcription through the fusion protein.}, number = {2}, urldate = {2017-01-04}, journal = {Cell}, author = {Shi, Yang and Seto, Edward and Chang, Long-Sheng and Shenk, Thomas}, month = oct, year = {1991}, pages = {377--388}, file = {ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RSE2UKDF/0092867491901896.html:text/html;Shi et al 1991.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RSE2UKDF/Shi et al 1991.pdf:application/pdf} } @article{schones_dynamic_2008, title = {Dynamic {Regulation} of {Nucleosome} {Positioning} in the {Human} {Genome}}, volume = {132}, issn = {0092-8674}, url = {http://www.sciencedirect.com/science/article/pii/S0092867408002705}, doi = {10.1016/j.cell.2008.02.022}, abstract = {Summary The positioning of nucleosomes with respect to DNA plays an important role in regulating transcription. However, nucleosome mapping has been performed for only limited genomic regions in humans. We have generated genome-wide maps of nucleosome positions in both resting and activated human CD4+ T cells by direct sequencing of nucleosome ends using the Solexa high-throughput sequencing technique. We find that nucleosome phasing relative to the transcription start sites is directly correlated to RNA polymerase II (Pol II) binding. Furthermore, the first nucleosome downstream of a start site exhibits differential positioning in active and silent genes. TCR signaling induces extensive nucleosome reorganization in promoters and enhancers to allow transcriptional activation or repression. Our results suggest that H2A.Z-containing and modified nucleosomes are preferentially lost from the −1 nucleosome position. Our data provide a comprehensive view of the nucleosome landscape and its dynamic regulation in the human genome.}, number = {5}, urldate = {2017-01-05}, journal = {Cell}, author = {Schones, Dustin E. and Cui, Kairong and Cuddapah, Suresh and Roh, Tae-Young and Barski, Artem and Wang, Zhibin and Wei, Gang and Zhao, Keji}, month = mar, year = {2008}, keywords = {DNA, SIGNALING}, pages = {887--898}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/C63W6BPF/Schones et al. - 2008 - Dynamic Regulation of Nucleosome Positioning in th.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/U7WWVSTV/S0092867408002705.html:text/html} } @article{dreos_eukaryotic_2017, title = {The eukaryotic promoter database in its 30th year: focus on non-vertebrate organisms}, volume = {45}, issn = {0305-1048, 1362-4962}, shorttitle = {The eukaryotic promoter database in its 30th year}, url = {http://nar.oxfordjournals.org/content/45/D1/D51}, doi = {10.1093/nar/gkw1069}, abstract = {We present an update of the Eukaryotic Promoter Database EPD (http://epd.vital-it.ch), more specifically on the EPDnew division, which contains comprehensive organisms-specific transcription start site (TSS) collections automatically derived from next generation sequencing (NGS) data. Thanks to the abundant release of new high-throughput transcript mapping data (CAGE, TSS-seq, GRO-cap) the database could be extended to plant and fungal species. We further report on the expansion of the mass genome annotation (MGA) repository containing promoter-relevant chromatin profiling data and on improvements for the EPD entry viewers. Finally, we present a new data access tool, ChIP-Extract, which enables computational biologists to extract diverse types of promoter-associated data in numerical table formats that are readily imported into statistical analysis platforms such as R.}, language = {en}, number = {D1}, urldate = {2017-01-05}, journal = {Nucleic Acids Research}, author = {Dreos, René and Ambrosini, Giovanna and Groux, Romain and Cavin Périer, Rouaïda and Bucher, Philipp}, month = jan, year = {2017}, pmid = {27899657}, pages = {D51--D55}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/JMIFBMU3/Dreos et al. - 2017 - The eukaryotic promoter database in its 30th year.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/NESWBKE4/D51.html:text/html} } @article{shlyueva_transcriptional_2014, title = {Transcriptional enhancers: from properties to genome-wide predictions}, volume = {15}, copyright = {© 2014 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1471-0056}, shorttitle = {Transcriptional enhancers}, url = {http://www.nature.com/nrg/journal/v15/n4/full/nrg3682.html}, doi = {10.1038/nrg3682}, abstract = {Cellular development, morphology and function are governed by precise patterns of gene expression. These are established by the coordinated action of genomic regulatory elements known as enhancers or cis-regulatory modules. More than 30 years after the initial discovery of enhancers, many of their properties have been elucidated; however, despite major efforts, we only have an incomplete picture of enhancers in animal genomes. In this Review, we discuss how properties of enhancer sequences and chromatin are used to predict enhancers in genome-wide studies. We also cover recently developed high-throughput methods that allow the direct testing and identification of enhancers on the basis of their activity. Finally, we discuss recent technological advances and current challenges in the field of regulatory genomics.}, language = {en}, number = {4}, urldate = {2017-01-06}, journal = {Nature Reviews Genetics}, author = {Shlyueva, Daria and Stampfel, Gerald and Stark, Alexander}, month = apr, year = {2014}, keywords = {Chromatin, Gene regulation, Transcriptional regulatory elements, Regulatory networks}, pages = {272--286}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/PPV2MDTW/Shlyueva et al. - 2014 - Transcriptional enhancers from properties to geno.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5QXIK7V9/nrg3682.html:text/html} } @article{fu_insulator_2008, title = {The {Insulator} {Binding} {Protein} {CTCF} {Positions} 20 {Nucleosomes} around {Its} {Binding} {Sites} across the {Human} {Genome}}, volume = {4}, issn = {1553-7404}, url = {http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1000138}, doi = {10.1371/journal.pgen.1000138}, abstract = {Author Summary The accessibility of genomic DNA to regulatory proteins and to the transcriptional machinery plays an important role in eukaryotic transcription regulation. Some regulatory proteins alter chromatin structures by evicting histones in selected loci. Nonetheless, no regulatory proteins have been reported to position nucleosomes genome-wide. The only genomic landmark that has been associated with well-positioned nucleosomes is the transcriptional start site (TSS)—several well-positioned nucleosomes are observed downstream of TSS genome-wide. Here we report that the CCCTC-binding factor (CTCF), a protein that binds insulator elements to prevent the spreading of heterochromatin and restricting transcriptional enhancers from activating unrelated promoters, possesses greater ability to position nucleosomes across the human genome than does the TSS. These well-positioned nucleosomes are highly enriched in a histone variant H2A.Z and 11 histone modifications. The nucleosomes enriched in the histone modifications previously implicated to correlate with active transcription tend to have less protected DNA against digestion by micrococcal nuclease, or greater DNA accessibility. This nucleosome-positioning ability is likely unique to CTCF, because it was not found in the other transcriptional factors we investigated. Thus we suggest that the binding of CTCF provides an anchor for positioning nucleosomes, and chromatin remodeling is an important aspect of CTCF function.}, number = {7}, urldate = {2017-01-10}, journal = {PLOS Genetics}, author = {Fu, Yutao and Sinha, Manisha and Peterson, Craig L. and Weng, Zhiping}, year = {2008}, keywords = {Chromatin, Transcription Factors, Nucleosomes, Histones, Nucleosome mapping, Histone modification, Insulators, DNA transcription}, pages = {e1000138}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GB4N7TQG/Fu et al. - 2008 - The Insulator Binding Protein CTCF Positions 20 Nu.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4KZ29XEU/article.html:text/html} } @article{boeva_novo_2010, title = {De novo motif identification improves the accuracy of predicting transcription factor binding sites in {ChIP}-{Seq} data analysis}, volume = {38}, issn = {0305-1048, 1362-4962}, url = {http://nar.oxfordjournals.org/content/38/11/e126}, doi = {10.1093/nar/gkq217}, abstract = {Dramatic progress in the development of next-generation sequencing technologies has enabled accurate genome-wide characterization of the binding sites of DNA-associated proteins. This technique, baptized as ChIP-Seq, uses a combination of chromatin immunoprecipitation and massively parallel DNA sequencing. Other published tools that predict binding sites from ChIP-Seq data use only positional information of mapped reads. In contrast, our algorithm MICSA (Motif Identification for ChIP-Seq Analysis) combines this source of positional information with information on motif occurrences to better predict binding sites of transcription factors (TFs). We proved the greater accuracy of MICSA with respect to several other tools by running them on datasets for the TFs NRSF, GABP, STAT1 and CTCF. We also applied MICSA on a dataset for the oncogenic TF EWS-FLI1. We discovered {\textgreater}2000 binding sites and two functionally different binding motifs. We observed that EWS-FLI1 can activate gene transcription when (i) its binding site is located in close proximity to the gene transcription start site (up to ∼150 kb), and (ii) it contains a microsatellite sequence. Furthermore, we observed that sites without microsatellites can also induce regulation of gene expression—positively as often as negatively—and at much larger distances (up to ∼1 Mb).}, language = {en}, number = {11}, urldate = {2017-01-11}, journal = {Nucleic Acids Research}, author = {Boeva, Valentina and Surdez, Didier and Guillon, Noëlle and Tirode, Franck and Fejes, Anthony P. and Delattre, Olivier and Barillot, Emmanuel}, month = jun, year = {2010}, pmid = {20375099}, pages = {e126--e126}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9CBMJ2JP/Boeva et al. - 2010 - De novo motif identification improves the accuracy.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/W6RIZBBF/e126.html:text/html} } @article{boeva_nebulaweb_server_2012, title = {Nebula—a web\_server for advanced {ChIP}-seq data analysis}, volume = {28}, issn = {1367-4803}, url = {https://academic.oup.com/bioinformatics/article/28/19/2517/288900/Nebula-a-web-server-for-advanced-ChIP-seq-data}, doi = {10.1093/bioinformatics/bts463}, number = {19}, urldate = {2017-01-19}, journal = {Bioinformatics}, author = {Boeva, Valentina and Lermine, Alban and Barette, Camille and Guillouf, Christel and Barillot, Emmanuel}, month = oct, year = {2012}, pages = {2517--2519}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/H9ZFAN5U/Boeva et al. - 2012 - Nebula—a web-server for advanced ChIP-seq data ana.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/T5I9WCVM/Nebula-a-web-server-for-advanced-ChIP-seq-data.html:text/html} } @article{ambrosini_chip-seq_2016, title = {The {ChIP}-{Seq} tools and web server: a resource for analyzing {ChIP}-seq and other types of genomic data}, volume = {17}, issn = {1471-2164}, shorttitle = {The {ChIP}-{Seq} tools and web server}, url = {http://dx.doi.org/10.1186/s12864-016-3288-8}, doi = {10.1186/s12864-016-3288-8}, abstract = {ChIP-seq and related high-throughput chromatin profilig assays generate ever increasing volumes of highly valuable biological data. To make sense out of it, biologists need versatile, efficient and user-friendly tools for access, visualization and itegrative analysis of such data.}, urldate = {2017-01-25}, journal = {BMC Genomics}, author = {Ambrosini, Giovanna and Dreos, René and Kumar, Sunil and Bucher, Philipp}, year = {2016}, keywords = {transcription factor binding sites, ChIP-seq data analysis, Bioinformatics resources, Web server, Peak finding, Genomic context analysis, Histone modifications, DNA sequence motifs}, pages = {938}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3TBE7A76/Ambrosini et al. - 2016 - The ChIP-Seq tools and web server a resource for .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KQWCAIV7/s12864-016-3288-8.html:text/html} } @article{singer_ciita_2013, title = {{CIITA} and {Its} {Dual} {Roles} in {MHC} {Gene} {Transcription}}, volume = {4}, issn = {1664-3224}, url = {http://journal.frontiersin.org/article/10.3389/fimmu.2013.00476/abstract}, doi = {10.3389/fimmu.2013.00476}, abstract = {CIITA is a transcriptional co-activator that regulates γ-interferon-activated transcription of Major Histocompatibility Complex (MHC) class I and class II genes. As such, it plays a critical role in immune responses: CIITA deficiency results in aberrant MHC gene expression and consequently in autoimmune diseases such as Type II bare lymphocyte syndrome. Although CIITA does not bind DNA directly, it regulates MHC transcription in two distinct ways– as a transcriptional activator and as a general transcription factor. As an activator, CIITA nucleates an enhanceosome consisting of the DNA binding transcription factors RFX, CREB and NF-Y. As a general transcription factor, CIITA functionally replaces the TFIID component, TAF1. Like TAF1, CIITA possesses acetyltransferase (AT) and kinase activities, both of which contribute to proper transcription of MHC class I and II genes. The substrate specificity and regulation of the CIITA AT and kinase activities also parallel those of TAF1. In addition, CIITA is tightly regulated by its various regulatory domains that undergo phosphorylation and influence its targeted localization. Thus, a complex picture of the mechanisms regulating CIITA function is emerging suggesting that CIITA has dual roles in transcriptional regulation which are summarized in this review.}, language = {English}, urldate = {2017-01-30}, journal = {Frontiers in Immunology}, author = {Singer, Dinah S. and Devaiah, Ballachanda Nanjappa}, year = {2013}, keywords = {CIITA, MHC transcription, TAF1, enhanceosome, general transcription factors, NLRCATERPILLER}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Q6JDIC2G/Singer et Devaiah - 2013 - CIITA and Its Dual Roles in MHC Gene Transcription.pdf:application/pdf} } @article{vo_creb-binding_2001, title = {{CREB}-binding {Protein} and p300 in {Transcriptional} {Regulation}}, volume = {276}, issn = {0021-9258, 1083-351X}, url = {http://www.jbc.org/content/276/17/13505}, doi = {10.1074/jbc.R000025200}, language = {en}, number = {17}, urldate = {2017-01-30}, journal = {Journal of Biological Chemistry}, author = {Vo, Ngan and Goodman, Richard H.}, month = apr, year = {2001}, pmid = {11279224}, pages = {13505--13508}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/E88QGRXZ/Vo et Goodman - 2001 - CREB-binding Protein and p300 in Transcriptional R.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GAJPFEN3/13505.html:text/html} } @article{bailey_fitting_1994, title = {Fitting a mixture model by expectation maximization to discover motifs in biopolymers}, volume = {2}, issn = {1553-0833}, abstract = {The algorithm described in this paper discovers one or more motifs in a collection of DNA or protein sequences by using the technique of expectation maximization to fit a two-component finite mixture model to the set of sequences. Multiple motifs are found by fitting a mixture model to the data, probabilistically erasing the occurrences of the motif thus found, and repeating the process to find successive motifs. The algorithm requires only a set of unaligned sequences and a number specifying the width of the motifs as input. It returns a model of each motif and a threshold which together can be used as a Bayes-optimal classifier for searching for occurrences of the motif in other databases. The algorithm estimates how many times each motif occurs in each sequence in the dataset and outputs an alignment of the occurrences of the motif. The algorithm is capable of discovering several different motifs with differing numbers of occurrences in a single dataset.}, language = {eng}, journal = {Proceedings. International Conference on Intelligent Systems for Molecular Biology}, author = {Bailey, T. L. and Elkan, C.}, year = {1994}, pmid = {7584402}, keywords = {Animals, Humans, Algorithms, Sequence analysis, Biopolymers, Models, Theoretical}, pages = {28--36}, file = {Bailey_Elkan94.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Bailey_Elkan94.pdf:application/pdf} } @article{isakova_smile-seq_2017, title = {{SMiLE}-seq identifies binding motifs of single and dimeric transcription factors}, volume = {advance online publication}, copyright = {© 2017 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1548-7091}, url = {http://www.nature.com/nmeth/journal/vaop/ncurrent/full/nmeth.4143.html}, doi = {10.1038/nmeth.4143}, abstract = {Resolving the DNA-binding specificities of transcription factors (TFs) is of critical value for understanding gene regulation. Here, we present a novel, semiautomated protein–DNA interaction characterization technology, selective microfluidics-based ligand enrichment followed by sequencing (SMiLE-seq). SMiLE-seq is neither limited by DNA bait length nor biased toward strong affinity binders; it probes the DNA-binding properties of TFs over a wide affinity range in a fast and cost-effective fashion. We validated SMiLE-seq by analyzing 58 full-length human, mouse, and Drosophila TFs from distinct structural classes. All tested TFs yielded DNA-binding models with predictive power comparable to or greater than that of other in vitro assays. De novo motif discovery on all JUN–FOS heterodimers and several nuclear receptor-TF complexes provided novel insights into partner-specific heterodimer DNA-binding preferences. We also successfully analyzed the DNA-binding properties of uncharacterized human C2H2 zinc-finger proteins and validated several using ChIP-exo.}, language = {en}, urldate = {2017-01-31}, journal = {Nature Methods}, author = {Isakova, Alina and Groux, Romain and Imbeault, Michael and Rainer, Pernille and Alpern, Daniel and Dainese, Riccardo and Ambrosini, Giovanna and Trono, Didier and Bucher, Philipp and Deplancke, Bart}, month = jan, year = {2017}, keywords = {Genomic analysis, Lab-on-a-chip}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/JBXIE44F/Isakova et al. - 2017 - SMiLE-seq identifies binding motifs of single and .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/EP3KB63D/nmeth.4143.html:text/html} } @book{marsland_machine_2015-1, address = {Boca Raton}, edition = {Second Edition}, title = {Machine {Learning}, {An} algorithmic {Perspective}, {Chapter} 7 {Probabilistic} {Learning}}, isbn = {978-1-4398-8921-3}, url = {https://www.crcpress.com/Machine-Learning-An-Algorithmic-Perspective/Marsland/p/book/9781439889213}, publisher = {CRC Press}, author = {Marsland, Stephen}, year = {2015} } @article{berger_universal_2009, title = {Universal protein-binding microarrays for the comprehensive characterization of the {DNA}-binding specificities of transcription factors}, volume = {4}, copyright = {© 2009 Nature Publishing Group}, issn = {1754-2189}, url = {http://www.nature.com/nprot/journal/v4/n3/full/nprot.2008.195.html}, doi = {10.1038/nprot.2008.195}, abstract = {Protein-binding microarray (PBM) technology provides a rapid, high-throughput means of characterizing the in vitro DNA-binding specificities of transcription factors (TFs). Using high-density, custom-designed microarrays containing all 10-mer sequence variants, one can obtain comprehensive binding-site measurements for any TF, regardless of its structural class or species of origin. Here, we present a protocol for the examination and analysis of TF-binding specificities at high resolution using such 'all 10-mer' universal PBMs. This procedure involves double-stranding a commercially synthesized DNA oligonucleotide array, binding a TF directly to the double-stranded DNA microarray and labeling the protein-bound microarray with a fluorophore-conjugated antibody. We describe how to computationally extract the relative binding preferences of the examined TF for all possible contiguous and gapped 8-mers over the full range of affinities, from highest affinity sites to nonspecific sites. Multiple proteins can be tested in parallel in separate chambers on a single microarray, enabling the processing of a dozen or more TFs in a single day.}, language = {en}, number = {3}, urldate = {2017-02-03}, journal = {Nature Protocols}, author = {Berger, Michael F. and Bulyk, Martha L.}, month = mar, year = {2009}, pages = {393--411}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/34HD9HV2/Berger et Bulyk - 2009 - Universal protein-binding microarrays for the comp.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5EJI7WIQ/nprot.2008.195.html:text/html} } @article{jolma_multiplexed_2010, title = {Multiplexed massively parallel {SELEX} for characterization of human transcription factor binding specificities}, volume = {20}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/20/6/861}, doi = {10.1101/gr.100552.109}, abstract = {The genetic code—the binding specificity of all transfer-RNAs—defines how protein primary structure is determined by DNA sequence. DNA also dictates when and where proteins are expressed, and this information is encoded in a pattern of specific sequence motifs that are recognized by transcription factors. However, the DNA-binding specificity is only known for a small fraction of the ∼1400 human transcription factors (TFs). We describe here a high-throughput method for analyzing transcription factor binding specificity that is based on systematic evolution of ligands by exponential enrichment (SELEX) and massively parallel sequencing. The method is optimized for analysis of large numbers of TFs in parallel through the use of affinity-tagged proteins, barcoded selection oligonucleotides, and multiplexed sequencing. Data are analyzed by a new bioinformatic platform that uses the hundreds of thousands of sequencing reads obtained to control the quality of the experiments and to generate binding motifs for the TFs. The described technology allows higher throughput and identification of much longer binding profiles than current microarray-based methods. In addition, as our method is based on proteins expressed in mammalian cells, it can also be used to characterize DNA-binding preferences of full-length proteins or proteins requiring post-translational modifications. We validate the method by determining binding specificities of 14 different classes of TFs and by confirming the specificities for NFATC1 and RFX3 using ChIP-seq. Our results reveal unexpected dimeric modes of binding for several factors that were thought to preferentially bind DNA as monomers.}, language = {en}, number = {6}, urldate = {2017-02-03}, journal = {Genome Research}, author = {Jolma, Arttu and Kivioja, Teemu and Toivonen, Jarkko and Cheng, Lu and Wei, Gonghong and Enge, Martin and Taipale, Mikko and Vaquerizas, Juan M. and Yan, Jian and Sillanpää, Mikko J. and Bonke, Martin and Palin, Kimmo and Talukder, Shaheynoor and Hughes, Timothy R. and Luscombe, Nicholas M. and Ukkonen, Esko and Taipale, Jussi}, month = jun, year = {2010}, pmid = {20378718}, pages = {861--873}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FJUK62JA/Jolma et al. - 2010 - Multiplexed massively parallel SELEX for character.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MDXB4KDB/861.html:text/html} } @article{thompson_rna_2012, title = {{RNA} {Profiling} and {Chromatin} {Immunoprecipitation}-{Sequencing} {Reveal} that {PTF}1a {Stabilizes} {Pancreas} {Progenitor} {Identity} via the {Control} of {MNX}1/{HLXB}9 and a {Network} of {Other} {Transcription} {Factors}}, volume = {32}, issn = {0270-7306, 1098-5549}, url = {http://mcb.asm.org/content/32/6/1189}, doi = {10.1128/MCB.06318-11}, abstract = {Pancreas development is initiated by the specification and expansion of a small group of endodermal cells. Several transcription factors are crucial for progenitor maintenance and expansion, but their interactions and the downstream targets mediating their activity are poorly understood. Among those factors, PTF1a, a basic helix-loop-helix (bHLH) transcription factor which controls pancreas exocrine cell differentiation, maintenance, and functionality, is also needed for the early specification of pancreas progenitors. We used RNA profiling and chromatin immunoprecipitation (ChIP) sequencing to identify a set of targets in pancreas progenitors. We demonstrate that Mnx1, a gene that is absolutely required in pancreas progenitors, is a major direct target of PTF1a and is regulated by a distant enhancer element. Pdx1, Nkx6.1, and Onecut1 are also direct PTF1a targets whose expression is promoted by PTF1a. These proteins, most of which were previously shown to be necessary for pancreas bud maintenance or formation, form a transcription factor network that allows the maintenance of pancreas progenitors. In addition, we identify Bmp7, Nr5a2, RhoV, and P2rx1 as new targets of PTF1a in pancreas progenitors.}, language = {en}, number = {6}, urldate = {2017-02-03}, journal = {Molecular and Cellular Biology}, author = {Thompson, Nancy and Gésina, Emilie and Scheinert, Peter and Bucher, Philipp and Grapin-Botton, Anne}, month = mar, year = {2012}, pmid = {22232429}, pages = {1189--1199}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3AAJMX28/Thompson et al. - 2012 - RNA Profiling and Chromatin Immunoprecipitation-Se.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BCNGS5A6/1189.html:text/html} } @article{isakova_smile-seq_2017-1, title = {{SMiLE}-seq identifies binding motifs of single and dimeric transcription factors}, volume = {advance online publication}, copyright = {© 2017 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1548-7091}, url = {http://www.nature.com/nmeth/journal/vaop/ncurrent/full/nmeth.4143.html}, doi = {10.1038/nmeth.4143}, abstract = {Resolving the DNA-binding specificities of transcription factors (TFs) is of critical value for understanding gene regulation. Here, we present a novel, semiautomated protein–DNA interaction characterization technology, selective microfluidics-based ligand enrichment followed by sequencing (SMiLE-seq). SMiLE-seq is neither limited by DNA bait length nor biased toward strong affinity binders; it probes the DNA-binding properties of TFs over a wide affinity range in a fast and cost-effective fashion. We validated SMiLE-seq by analyzing 58 full-length human, mouse, and Drosophila TFs from distinct structural classes. All tested TFs yielded DNA-binding models with predictive power comparable to or greater than that of other in vitro assays. De novo motif discovery on all JUN–FOS heterodimers and several nuclear receptor-TF complexes provided novel insights into partner-specific heterodimer DNA-binding preferences. We also successfully analyzed the DNA-binding properties of uncharacterized human C2H2 zinc-finger proteins and validated several using ChIP-exo.}, language = {en}, urldate = {2017-02-03}, journal = {Nature Methods}, author = {Isakova, Alina and Groux, Romain and Imbeault, Michael and Rainer, Pernille and Alpern, Daniel and Dainese, Riccardo and Ambrosini, Giovanna and Trono, Didier and Bucher, Philipp and Deplancke, Bart}, month = jan, year = {2017}, keywords = {Genomic analysis, Lab-on-a-chip}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/W72MXR9H/Isakova et al. - 2017 - SMiLE-seq identifies binding motifs of single and .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/C7I7TIXW/nmeth.4143.html:text/html} } @article{robertson_genome-wide_2007, title = {Genome-wide profiles of {STAT}1 {DNA} association using chromatin immunoprecipitation and massively parallel sequencing}, volume = {4}, copyright = {© 2007 Nature Publishing Group}, issn = {1548-7091}, url = {http://www.nature.com/nmeth/journal/v4/n8/full/nmeth1068.html}, doi = {10.1038/nmeth1068}, abstract = {We developed a method, ChIP-sequencing (ChIP-seq), combining chromatin immunoprecipitation (ChIP) and massively parallel sequencing to identify mammalian DNA sequences bound by transcription factors in vivo. We used ChIP-seq to map STAT1 targets in interferon-γ (IFN-γ)–stimulated and unstimulated human HeLa S3 cells, and compared the method's performance to ChIP-PCR and to ChIP-chip for four chromosomes. By ChIP-seq, using 15.1 and 12.9 million uniquely mapped sequence reads, and an estimated false discovery rate of less than 0.001, we identified 41,582 and 11,004 putative STAT1-binding regions in stimulated and unstimulated cells, respectively. Of the 34 loci known to contain STAT1 interferon-responsive binding sites, ChIP-seq found 24 (71\%). ChIP-seq targets were enriched in sequences similar to known STAT1 binding motifs. Comparisons with two ChIP-PCR data sets suggested that ChIP-seq sensitivity was between 70\% and 92\% and specificity was at least 95\%.}, language = {en}, number = {8}, urldate = {2017-02-05}, journal = {Nature Methods}, author = {Robertson, Gordon and Hirst, Martin and Bainbridge, Matthew and Bilenky, Misha and Zhao, Yongjun and Zeng, Thomas and Euskirchen, Ghia and Bernier, Bridget and Varhol, Richard and Delaney, Allen and Thiessen, Nina and Griffith, Obi L. and He, Ann and Marra, Marco and Snyder, Michael and Jones, Steven}, month = aug, year = {2007}, pages = {651--657}, file = {Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/59396VND/nmeth1068.html:text/html} } @article{schutz_mamot:_2008, title = {{MAMOT}: hidden {Markov} modeling tool}, volume = {24}, issn = {1367-4803}, shorttitle = {{MAMOT}}, url = {https://academic.oup.com/bioinformatics/article/24/11/1399/192734/MAMOT-hidden-Markov-modeling-tool}, doi = {10.1093/bioinformatics/btn201}, number = {11}, urldate = {2017-02-05}, journal = {Bioinformatics}, author = {Schütz, Frédéric and Delorenzi, Mauro}, month = jun, year = {2008}, pages = {1399--1400}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/WMUURNHG/Schütz et Delorenzi - 2008 - MAMOT hidden Markov modeling tool.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HHAXMX5Z/MAMOT-hidden-Markov-modeling-tool.html:text/html} } @article{donohoe_identification_2007, title = {Identification of a {Ctcf} {Cofactor}, {Yy}1, for the {X} {Chromosome} {Binary} {Switch}}, volume = {25}, issn = {1097-2765}, url = {https://www.sciencedirect.com/science/article/pii/S109727650600788X}, doi = {10.1016/j.molcel.2006.11.017}, abstract = {Summary In mammals, inactivation of one X chromosome in the female equalizes gene dosages between XX females and XY males. Two noncoding loci, Tsix and Xite, together regulate X chromosome fate by controlling homologous chromosome pairing, counting, and mutually exclusive choice. Following choice, the asymmetry of Xite and Tsix expression drives divergent chromosome fates, but how this pattern becomes established is currently unknown. Although no proven trans-acting factors have been identified, a likely candidate is Ctcf, a chromatin insulator with essential function in autosomal imprinting. Here, we search for trans-factors and identify Yy1 as a required cofactor for Ctcf. Paired Ctcf-Yy1 elements are highly clustered within the counting/choice and imprinting domain of Tsix. A deficiency of Yy1 leads to aberrant Tsix and Xist expression, resulting in a deficit of male and female embryos. Yy1 and Ctcf associate through specific protein-protein interactions and together transactivate Tsix. We propose that the Ctcf-Yy1-Tsix complex functions as a key component of the X chromosome binary switch.}, number = {1}, urldate = {2017-02-07}, journal = {Molecular Cell}, author = {Donohoe, Mary E. and Zhang, Li-Feng and Xu, Na and Shi, Yang and Lee, Jeannie T.}, month = jan, year = {2007}, keywords = {DNA, DEVBIO}, pages = {43--56}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FUWNBNEA/Donohoe et al. - 2007 - Identification of a Ctcf Cofactor, Yy1, for the X .pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SQSMKC36/S109727650600788X.html:text/html} } @article{stedman_cohesins_2008, title = {Cohesins localize with {CTCF} at the {KSHV} latency control region and at cellular c-myc and {H}19 {Igf}2 insulators}, volume = {27}, copyright = {Copyright © 2008 European Molecular Biology Organization}, issn = {0261-4189, 1460-2075}, url = {http://emboj.embopress.org/content/27/4/654}, doi = {10.1038/emboj.2008.1}, abstract = {Cohesins, which mediate sister chromatin cohesion, and CTCF, which functions at chromatin boundaries, play key roles in the structural and functional organization of chromosomes. We examined the binding of these two factors on the Kaposi's sarcoma‐associated herpesvirus (KSHV) episome during latent infection and found a striking colocalization within the control region of the major latency transcript responsible for expressing LANA (ORF73), vCyclin (ORF72), vFLIP (ORF71), and vmiRNAs. Deletion of the CTCF‐binding site from the viral genome disrupted cohesin binding, and crippled colony formation in 293 cells. Clonal instability correlated with elevated expression of lytic cycle gene products, notably the neighbouring promoter for K14 and vGPCR (ORF74). siRNA depletion of RAD21 from latently infected cells caused an increase in K14 and ORF74, and lytic inducers caused a rapid dissociation of RAD21 from the viral genome. RAD21 and SMC1 also associate with the cellular CTCF sites at mammalian c‐myc promoter and H19 / Igf2 imprinting control region. We conclude that cohesin subunits associate with viral and cellular CTCF sites involved in complex gene regulation and chromatin organization.}, language = {en}, number = {4}, urldate = {2017-02-07}, journal = {The EMBO Journal}, author = {Stedman, William and Kang, Hyojeung and Lin, Shu and Kissil, Joseph L. and Bartolomei, Marisa S. and Lieberman, Paul M.}, month = feb, year = {2008}, pmid = {18219272}, pages = {654--666}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6JPAWDK8/Stedman et al. - 2008 - Cohesins localize with CTCF at the KSHV latency co.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RXH9XCBG/654.html:text/html} } @article{orenstein_comparative_2014, title = {A comparative analysis of transcription factor binding models learned from {PBM}, {HT}-{SELEX} and {ChIP} data}, volume = {42}, issn = {0305-1048}, url = {https://academic.oup.com/nar/article/42/8/e63/1067315/A-comparative-analysis-of-transcription-factor}, doi = {10.1093/nar/gku117}, number = {8}, urldate = {2017-02-07}, journal = {Nucleic Acids Research}, author = {Orenstein, Yaron and Shamir, Ron}, month = apr, year = {2014}, pages = {e63--e63}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KSRXNAZ6/Orenstein et Shamir - 2014 - A comparative analysis of transcription factor bin.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XAPXSRB5/gku117.html:text/html} } @article{guo_discovering_2010, title = {Discovering homotypic binding events at high spatial resolution}, volume = {26}, issn = {1367-4803}, url = {https://academic.oup.com/bioinformatics/article/26/24/3028/289014/Discovering-homotypic-binding-events-at-high}, doi = {10.1093/bioinformatics/btq590}, number = {24}, urldate = {2017-02-13}, journal = {Bioinformatics}, author = {Guo, Yuchun and Papachristoudis, Georgios and Altshuler, Robert C. and Gerber, Georg K. and Jaakkola, Tommi S. and Gifford, David K. and Mahony, Shaun}, month = dec, year = {2010}, pages = {3028--3034}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/NHTWZW37/Guo et al. - 2010 - Discovering homotypic binding events at high spati.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/G4B37CI9/btq590.html:text/html} } @article{roulet_high-throughput_2002, title = {High-throughput {SELEX}–{SAGE} method for quantitative modeling of transcription-factor binding sites}, volume = {20}, copyright = {© 2002 Nature Publishing Group}, issn = {1087-0156}, url = {http://www.nature.com/nbt/journal/v20/n8/full/nbt718.html}, doi = {10.1038/nbt718}, abstract = {The ability to determine the location and relative strength of all transcription-factor binding sites in a genome is important both for a comprehensive understanding of gene regulation and for effective promoter engineering in biotechnological applications. Here we present a bioinformatically driven experimental method to accurately define the DNA-binding sequence specificity of transcription factors. A generalized profile was used as a predictive quantitative model for binding sites, and its parameters were estimated from in vitro–selected ligands using standard hidden Markov model training algorithms. Computer simulations showed that several thousand low- to medium-affinity sequences are required to generate a profile of desired accuracy. To produce data on this scale, we applied high-throughput genomics methods to the biochemical problem addressed here. A method combining systematic evolution of ligands by exponential enrichment (SELEX) and serial analysis of gene expression (SAGE) protocols was coupled to an automated quality-controlled sequence extraction procedure based on Phred quality scores. This allowed the sequencing of a database of more than 10,000 potential DNA ligands for the CTF/NFI transcription factor. The resulting binding-site model defines the sequence specificity of this protein with a high degree of accuracy not achieved earlier and thereby makes it possible to identify previously unknown regulatory sequences in genomic DNA. A covariance analysis of the selected sites revealed non-independent base preferences at different nucleotide positions, providing insight into the binding mechanism.}, language = {en}, number = {8}, urldate = {2017-02-19}, journal = {Nature Biotechnology}, author = {Roulet, Emmanuelle and Busso, Stéphane and Camargo, Anamaria A. and Simpson, Andrew J. G. and Mermod, Nicolas and Bucher, Philipp}, month = aug, year = {2002}, pages = {831--835}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7G98EPPD/Roulet et al. - 2002 - High-throughput SELEX–SAGE method for quantitative.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GNXT26I9/nbt718.html:text/html} } @article{ghirlando_ctcf:_2016, title = {{CTCF}: making the right connections}, volume = {30}, issn = {0890-9369, 1549-5477}, shorttitle = {{CTCF}}, url = {http://genesdev.cshlp.org/content/30/8/881}, doi = {10.1101/gad.277863.116}, abstract = {The role of the zinc finger protein CTCF in organizing the genome within the nucleus is now well established. Widely separated sites on DNA, occupied by both CTCF and the cohesin complex, make physical contacts that create large loop domains. Additional contacts between loci within those domains, often also mediated by CTCF, tend to be favored over contacts between loci in different domains. A large number of studies during the past 2 years have addressed the questions: How are these loops generated? What are the effects of disrupting them? Are there rules governing large-scale genome organization? It now appears that the strongest and evolutionarily most conserved of these CTCF interactions have specific rules for the orientation of the paired CTCF sites, implying the existence of a nonequilibrium mechanism of generation. Recent experiments that invert, delete, or inactivate one of a mating CTCF pair result in major changes in patterns of organization and gene expression in the surrounding regions. What remain to be determined are the detailed molecular mechanisms for re-establishing loop domains and maintaining them after replication and mitosis. As recently published data show, some mechanisms may involve interactions with noncoding RNAs as well as protein cofactors. Many CTCF sites are also involved in other functions such as modulation of RNA splicing and specific regulation of gene expression, and the relationship between these activities and loop formation is another unanswered question that should keep investigators occupied for some time.}, language = {en}, number = {8}, urldate = {2017-02-23}, journal = {Genes \& Development}, author = {Ghirlando, Rodolfo and Felsenfeld, Gary}, month = apr, year = {2016}, pmid = {27083996}, keywords = {Chromatin, Insulators, topologically associated domains}, pages = {881--891}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ASNKGH65/Ghirlando et Felsenfeld - 2016 - CTCF making the right connections.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4NW9GNZ5/881.html:text/html} } @article{van_helden_confidence_2016, title = {Confidence intervals are no salvation from the alleged fickleness of the {P} value}, volume = {13}, copyright = {© 2016 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1548-7091}, url = {http://www.nature.com/nmeth/journal/v13/n8/full/nmeth.3932.html?WT.ec_id=NMETH-201608&spMailingID=51937223&spUserID=ODkwMTM2NjMzMgS2&spJobID=964115062&spReportId=OTY0MTE1MDYyS0}, doi = {10.1038/nmeth.3932}, language = {en}, number = {8}, urldate = {2017-02-27}, journal = {Nature Methods}, author = {van Helden, Jacques}, month = aug, year = {2016}, keywords = {Scientific community, Standards, Statistical methods}, pages = {605--606}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9K4SNU5Q/van Helden - 2016 - Confidence intervals are no salvation from the all.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Z3KUI6VQ/nmeth.3932.html:text/html} } @article{thomas-chollier_rsat:_2008, title = {{RSAT}: regulatory sequence analysis tools}, volume = {36}, issn = {0305-1048}, shorttitle = {{RSAT}}, url = {https://academic.oup.com/nar/article/36/suppl_2/W119/2506687/RSAT-regulatory-sequence-analysis-tools}, doi = {10.1093/nar/gkn304}, number = {suppl\_2}, urldate = {2017-02-27}, journal = {Nucleic Acids Research}, author = {Thomas-Chollier, Morgane and Sand, Olivier and Turatsinze, Jean-Valéry and Janky, Rekin's and Defrance, Matthieu and Vervisch, Eric and Brohée, Sylvain and van Helden, Jacques}, month = jul, year = {2008}, pages = {W119--W127}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/V565AVBF/Thomas-Chollier et al. - 2008 - RSAT regulatory sequence analysis tools.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GZBFC8BJ/RSAT-regulatory-sequence-analysis-tools.html:text/html} } @article{wilbanks_evaluation_2010-1, title = {Evaluation of {Algorithm} {Performance} in {ChIP}-{Seq} {Peak} {Detection}}, volume = {5}, issn = {1932-6203}, url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0011471}, doi = {10.1371/journal.pone.0011471}, abstract = {Next-generation DNA sequencing coupled with chromatin immunoprecipitation (ChIP-seq) is revolutionizing our ability to interrogate whole genome protein-DNA interactions. Identification of protein binding sites from ChIP-seq data has required novel computational tools, distinct from those used for the analysis of ChIP-Chip experiments. The growing popularity of ChIP-seq spurred the development of many different analytical programs (at last count, we noted 31 open source methods), each with some purported advantage. Given that the literature is dense and empirical benchmarking challenging, selecting an appropriate method for ChIP-seq analysis has become a daunting task. Herein we compare the performance of eleven different peak calling programs on common empirical, transcription factor datasets and measure their sensitivity, accuracy and usability. Our analysis provides an unbiased critical assessment of available technologies, and should assist researchers in choosing a suitable tool for handling ChIP-seq data.}, number = {7}, urldate = {2017-03-15}, journal = {PLOS ONE}, author = {Wilbanks, Elizabeth G. and Facciotti, Marc T.}, year = {2010}, keywords = {Transcription Factors, Sequence motif analysis, Algorithms, DNA-binding proteins, Computer software, Genomic databases, Sequence tagged site analysis, Human genomics}, pages = {e11471}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/J5VJPGWV/Wilbanks et Facciotti - 2010 - Evaluation of Algorithm Performance in ChIP-Seq Pe.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HWSDRPH6/article.html:text/html} } @article{nozaki_tight_2011-1, title = {Tight associations between transcription promoter type and epigenetic variation in histone positioning and modification}, volume = {12}, issn = {1471-2164}, url = {http://dx.doi.org/10.1186/1471-2164-12-416}, doi = {10.1186/1471-2164-12-416}, abstract = {Transcription promoters are fundamental genomic cis-elements controlling gene expression. They can be classified into two types by the degree of imprecision of their transcription start sites: peak promoters, which initiate transcription from a narrow genomic region; and broad promoters, which initiate transcription from a wide-ranging region. Eukaryotic transcription initiation is suggested to be associated with the genomic positions and modifications of nucleosomes. For instance, it has been recently shown that histone with H3K9 acetylation (H3K9ac) is more likely to be distributed around broad promoters rather than peak promoters; it can thus be inferred that there is an association between histone H3K9 and promoter architecture.}, urldate = {2017-03-17}, journal = {BMC Genomics}, author = {Nozaki, Tadasu and Yachie, Nozomu and Ogawa, Ryu and Kratz, Anton and Saito, Rintaro and Tomita, Masaru}, year = {2011}, pages = {416}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/VIU8E9VW/Nozaki et al. - 2011 - Tight associations between transcription promoter .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HTUH5TZD/1471-2164-12-416.html:text/html} } @article{the_fantom_consortium_and_the_riken_pmi_and_clst_dgt_promoter-level_2014, title = {A promoter-level mammalian expression atlas}, volume = {507}, copyright = {© 2014 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {0028-0836}, url = {http://www.nature.com/nature/journal/v507/n7493/full/nature13182.html}, doi = {10.1038/nature13182}, abstract = {Regulated transcription controls the diversity, developmental pathways and spatial organization of the hundreds of cell types that make up a mammal. Using single-molecule cDNA sequencing, we mapped transcription start sites (TSSs) and their usage in human and mouse primary cells, cell lines and tissues to produce a comprehensive overview of mammalian gene expression across the human body. We find that few genes are truly ‘housekeeping’, whereas many mammalian promoters are composite entities composed of several closely separated TSSs, with independent cell-type-specific expression profiles. TSSs specific to different cell types evolve at different rates, whereas promoters of broadly expressed genes are the most conserved. Promoter-based expression analysis reveals key transcription factors defining cell states and links them to binding-site motifs. The functions of identified novel transcripts can be predicted by coexpression and sample ontology enrichment analyses. The functional annotation of the mammalian genome 5 (FANTOM5) project provides comprehensive expression profiles and functional annotation of mammalian cell-type-specific transcriptomes with wide applications in biomedical research.}, language = {en}, number = {7493}, urldate = {2017-03-17}, journal = {Nature}, author = {{The FANTOM Consortium and the RIKEN PMI and CLST (dgt)}}, month = mar, year = {2014}, keywords = {Gene regulation, transcriptomics, Gene regulatory networks, Multicellular systems}, pages = {462--470}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XRAKAQX9/The FANTOM Consortium and the RIKEN PMI and CLST (dgt) - 2014 - A promoter-level mammalian expression atlas.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QJQ6K929/nature13182.html:text/html} } @article{spivakov_analysis_2012, title = {Analysis of variation at transcription factor binding sites in {Drosophila} and humans}, volume = {13}, issn = {1474-760X}, url = {http://dx.doi.org/10.1186/gb-2012-13-9-r49}, doi = {10.1186/gb-2012-13-9-r49}, abstract = {Advances in sequencing technology have boosted population genomics and made it possible to map the positions of transcription factor binding sites (TFBSs) with high precision. Here we investigate TFBS variability by combining transcription factor binding maps generated by ENCODE, modENCODE, our previously published data and other sources with genomic variation data for human individuals and Drosophila isogenic lines.}, urldate = {2017-03-24}, journal = {Genome Biology}, author = {Spivakov, Mikhail and Akhtar, Junaid and Kheradpour, Pouya and Beal, Kathryn and Girardot, Charles and Koscielny, Gautier and Herrero, Javier and Kellis, Manolis and Furlong, Eileen EM and Birney, Ewan}, year = {2012}, pages = {R49}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/S2XFQGBR/Spivakov et al. - 2012 - Analysis of variation at transcription factor bind.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/K43SM4MK/gb-2012-13-9-r49.html:text/html} } @article{landt_chip-seq_2012-1, title = {{ChIP}-seq guidelines and practices of the {ENCODE} and {modENCODE} consortia}, volume = {22}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/22/9/1813}, doi = {10.1101/gr.136184.111}, abstract = {Chromatin immunoprecipitation (ChIP) followed by high-throughput DNA sequencing (ChIP-seq) has become a valuable and widely used approach for mapping the genomic location of transcription-factor binding and histone modifications in living cells. Despite its widespread use, there are considerable differences in how these experiments are conducted, how the results are scored and evaluated for quality, and how the data and metadata are archived for public use. These practices affect the quality and utility of any global ChIP experiment. Through our experience in performing ChIP-seq experiments, the ENCODE and modENCODE consortia have developed a set of working standards and guidelines for ChIP experiments that are updated routinely. The current guidelines address antibody validation, experimental replication, sequencing depth, data and metadata reporting, and data quality assessment. We discuss how ChIP quality, assessed in these ways, affects different uses of ChIP-seq data. All data sets used in the analysis have been deposited for public viewing and downloading at the ENCODE (http://encodeproject.org/ENCODE/) and modENCODE (http://www.modencode.org/) portals.}, language = {en}, number = {9}, urldate = {2017-03-24}, journal = {Genome Research}, author = {Landt, Stephen G. and Marinov, Georgi K. and Kundaje, Anshul and Kheradpour, Pouya and Pauli, Florencia and Batzoglou, Serafim and Bernstein, Bradley E. and Bickel, Peter and Brown, James B. and Cayting, Philip and Chen, Yiwen and DeSalvo, Gilberto and Epstein, Charles and Fisher-Aylor, Katherine I. and Euskirchen, Ghia and Gerstein, Mark and Gertz, Jason and Hartemink, Alexander J. and Hoffman, Michael M. and Iyer, Vishwanath R. and Jung, Youngsook L. and Karmakar, Subhradip and Kellis, Manolis and Kharchenko, Peter V. and Li, Qunhua and Liu, Tao and Liu, X. Shirley and Ma, Lijia and Milosavljevic, Aleksandar and Myers, Richard M. and Park, Peter J. and Pazin, Michael J. and Perry, Marc D. and Raha, Debasish and Reddy, Timothy E. and Rozowsky, Joel and Shoresh, Noam and Sidow, Arend and Slattery, Matthew and Stamatoyannopoulos, John A. and Tolstorukov, Michael Y. and White, Kevin P. and Xi, Simon and Farnham, Peggy J. and Lieb, Jason D. and Wold, Barbara J. and Snyder, Michael}, month = sep, year = {2012}, pmid = {22955991}, pages = {1813--1831}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/N7CG2BSN/Landt et al. - 2012 - ChIP-seq guidelines and practices of the ENCODE an.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/REBXQDRA/1813.html:text/html} } @article{lai_archalign:_2010, title = {{ArchAlign}: coordinate-free chromatin alignment reveals novel architectures}, volume = {11}, issn = {1474-760X}, shorttitle = {{ArchAlign}}, url = {http://dx.doi.org/10.1186/gb-2010-11-12-r126}, doi = {10.1186/gb-2010-11-12-r126}, abstract = {To facilitate identification and characterization of genomic functional elements, we have developed a chromatin architecture alignment algorithm (ArchAlign). ArchAlign identifies shared chromatin structural patterns from high-resolution chromatin structural datasets derived from next-generation sequencing or tiled microarray approaches for user defined regions of interest. We validated ArchAlign using well characterized functional elements, and used it to explore the chromatin structural architecture at CTCF binding sites in the human genome. ArchAlign is freely available at http://www.acsu.buffalo.edu/{\textasciitilde}mjbuck/ArchAlign.html .}, urldate = {2017-04-13}, journal = {Genome Biology}, author = {Lai, William KM and Buck, Michael J.}, year = {2010}, pages = {R126}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ARFHNX3Q/Lai et Buck - 2010 - ArchAlign coordinate-free chromatin alignment rev.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/J7EV6WKM/gb-2010-11-12-r126.html:text/html} } @article{deplancke_genetics_2016, title = {The {Genetics} of {Transcription} {Factor} {DNA} {Binding} {Variation}}, volume = {166}, issn = {0092-8674, 1097-4172}, url = {http://www.cell.com/cell/abstract/S0092-8674(16)30918-7}, doi = {10.1016/j.cell.2016.07.012}, abstract = {Most complex trait-associated variants are located in non-coding regulatory regions of the genome, where they have been shown to disrupt transcription factor (TF)-DNA binding motifs. Variable TF-DNA interactions are therefore increasingly considered as key drivers of phenotypic variation. However, recent genome-wide studies revealed that the majority of variable TF-DNA binding events are not driven by sequence alterations in the motif of the studied TF. This observation implies that the molecular mechanisms underlying TF-DNA binding variation and, by extrapolation, inter-individual phenotypic variation are more complex than originally anticipated. Here, we summarize the findings that led to this important paradigm shift and review proposed mechanisms for local, proximal, or distal genetic variation-driven variable TF-DNA binding. In addition, we discuss the biomedical implications of these findings for our ability to dissect the molecular role(s) of non-coding genetic variants in complex traits, including disease susceptibility.}, language = {English}, number = {3}, urldate = {2017-06-22}, journal = {Cell}, author = {Deplancke, Bart and Alpern, Daniel and Gardeux, Vincent}, month = jul, year = {2016}, pmid = {27471964}, pages = {538--554}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/T7E8QFWA/Deplancke et al. - 2016 - The Genetics of Transcription Factor DNA Binding V.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/V7R8ZBU4/S0092-8674(16)30918-7.html:text/html} } @article{birney_allele-specific_2010, title = {Allele-specific and heritable chromatin signatures in humans}, volume = {19}, issn = {0964-6906}, url = {https://academic.oup.com/hmg/article/19/R2/R204/640861/Allele-specific-and-heritable-chromatin-signatures}, doi = {10.1093/hmg/ddq404}, number = {R2}, urldate = {2017-07-12}, journal = {Human Molecular Genetics}, author = {Birney, Ewan and Lieb, Jason D. and Furey, Terrence S. and Crawford, Gregory E. and Iyer, Vishwanath R.}, month = oct, year = {2010}, pages = {R204--R209}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/M7K7MBWQ/Birney et al. - 2010 - Allele-specific and heritable chromatin signatures.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8ED43FEZ/ddq404.html:text/html} } @article{nielsen_catchprofiles:_2012, title = {{CATCHprofiles}: {Clustering} and {Alignment} {Tool} for {ChIP} {Profiles}}, volume = {7}, issn = {1932-6203}, shorttitle = {{CATCHprofiles}}, url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0028272}, doi = {10.1371/journal.pone.0028272}, abstract = {Chromatin Immuno Precipitation (ChIP) profiling detects in vivo protein-DNA binding, and has revealed a large combinatorial complexity in the binding of chromatin associated proteins and their post-translational modifications. To fully explore the spatial and combinatorial patterns in ChIP-profiling data and detect potentially meaningful patterns, the areas of enrichment must be aligned and clustered, which is an algorithmically and computationally challenging task. We have developed CATCHprofiles, a novel tool for exhaustive pattern detection in ChIP profiling data. CATCHprofiles is built upon a computationally efficient implementation for the exhaustive alignment and hierarchical clustering of ChIP profiling data. The tool features a graphical interface for examination and browsing of the clustering results. CATCHprofiles requires no prior knowledge about functional sites, detects known binding patterns “ab initio”, and enables the detection of new patterns from ChIP data at a high resolution, exemplified by the detection of asymmetric histone and histone modification patterns around H2A.Z-enriched sites. CATCHprofiles' capability for exhaustive analysis combined with its ease-of-use makes it an invaluable tool for explorative research based on ChIP profiling data. CATCHprofiles and the CATCH algorithm run on all platforms and is available for free through the CATCH website: http://catch.cmbi.ru.nl/. User support is available by subscribing to the mailing list catch-users@bioinformatics.org.}, number = {1}, urldate = {2017-07-20}, journal = {PLOS ONE}, author = {Nielsen, Fiona G. G. and Markus, Kasper Galschiøt and Friborg, Rune Møllegaard and Favrholdt, Lene Monrad and Stunnenberg, Hendrik G. and Huynen, Martijn}, month = jan, year = {2012}, keywords = {Sequence alignment, Algorithms, DNA-binding proteins, Nucleosomes, Histones, Genome analysis, Histone modification, Genomic signal processing}, pages = {e28272}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/C5N4PK8T/Nielsen et al. - 2012 - CATCHprofiles Clustering and Alignment Tool for C.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UKZNEK7P/article.html:text/html} } @article{dalton_clustering_2009, title = {Clustering {Algorithms}: {On} {Learning}, {Validation}, {Performance}, and {Applications} to {Genomics}}, volume = {10}, issn = {1389-2029}, shorttitle = {Clustering {Algorithms}}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2766793/}, doi = {10.2174/138920209789177601}, abstract = {The development of microarray technology has enabled scientists to measure the expression of thousands of genes simultaneously, resulting in a surge of interest in several disciplines throughout biology and medicine. While data clustering has been used for decades in image processing and pattern recognition, in recent years it has joined this wave of activity as a popular technique to analyze microarrays. To illustrate its application to genomics, clustering applied to genes from a set of microarray data groups together those genes whose expression levels exhibit similar behavior throughout the samples, and when applied to samples it offers the potential to discriminate pathologies based on their differential patterns of gene expression. Although clustering has now been used for many years in the context of gene expression microarrays, it has remained highly problematic. The choice of a clustering algorithm and validation index is not a trivial one, more so when applying them to high throughput biological or medical data. Factors to consider when choosing an algorithm include the nature of the application, the characteristics of the objects to be analyzed, the expected number and shape of the clusters, and the complexity of the problem versus computational power available. In some cases a very simple algorithm may be appropriate to tackle a problem, but many situations may require a more complex and powerful algorithm better suited for the job at hand. In this paper, we will cover the theoretical aspects of clustering, including error and learning, followed by an overview of popular clustering algorithms and classical validation indices. We also discuss the relative performance of these algorithms and indices and conclude with examples of the application of clustering to computational biology.}, number = {6}, journal = {Current Genomics}, author = {Dalton, Lori and Ballarin, Virginia and Brun, Marcel}, month = sep, year = {2009}, pmid = {20190957}, pmcid = {PMC2766793}, pages = {430--445}, file = {PubMed Central Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9CNCMQR7/Dalton et al. - 2009 - Clustering Algorithms On Learning, Validation, Pe.pdf:application/pdf} } @article{rodriguez_clustering_2014, title = {Clustering by fast search and find of density peaks}, volume = {344}, copyright = {Copyright © 2014, American Association for the Advancement of Science}, issn = {0036-8075, 1095-9203}, url = {http://science.sciencemag.org/content/344/6191/1492}, doi = {10.1126/science.1242072}, abstract = {Discerning clusters of data points Cluster analysis is used in many disciplines to group objects according to a defined measure of distance. Numerous algorithms exist, some based on the analysis of the local density of data points, and others on predefined probability distributions. Rodriguez and Laio devised a method in which the cluster centers are recognized as local density maxima that are far away from any points of higher density. The algorithm depends only on the relative densities rather than their absolute values. The authors tested the method on a series of data sets, and its performance compared favorably to that of established techniques. Science, this issue p. 1492 Cluster analysis is aimed at classifying elements into categories on the basis of their similarity. Its applications range from astronomy to bioinformatics, bibliometrics, and pattern recognition. We propose an approach based on the idea that cluster centers are characterized by a higher density than their neighbors and by a relatively large distance from points with higher densities. This idea forms the basis of a clustering procedure in which the number of clusters arises intuitively, outliers are automatically spotted and excluded from the analysis, and clusters are recognized regardless of their shape and of the dimensionality of the space in which they are embedded. We demonstrate the power of the algorithm on several test cases. Local density of points is ranked and analyzed to categorize data. Local density of points is ranked and analyzed to categorize data.}, language = {en}, number = {6191}, urldate = {2017-08-07}, journal = {Science}, author = {Rodriguez, Alex and Laio, Alessandro}, month = jun, year = {2014}, pmid = {24970081}, pages = {1492--1496}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/A8TS2M3Z/Rodriguez et Laio - 2014 - Clustering by fast search and find of density peak.pdf:application/pdf;Rodriguez.SM.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/A8TS2M3Z/Rodriguez.SM.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BFPAPNRQ/1492.html:text/html} } @article{grant_fimo:_2011, title = {{FIMO}: scanning for occurrences of a given motif}, volume = {27}, issn = {1367-4803}, shorttitle = {{FIMO}}, url = {https://academic.oup.com/bioinformatics/article/27/7/1017/232614/FIMO-scanning-for-occurrences-of-a-given-motif}, doi = {10.1093/bioinformatics/btr064}, abstract = {Summary: A motif is a short DNA or protein sequence that contributes to the biological function of the sequence in which it resides. Over the past several decades, many computational methods have been described for identifying, characterizing and searching with sequence motifs. Critical to nearly any motif-based sequence analysis pipeline is the ability to scan a sequence database for occurrences of a given motif described by a position-specific frequency matrix.Results: We describe Find Individual Motif Occurrences (FIMO), a software tool for scanning DNA or protein sequences with motifs described as position-specific scoring matrices. The program computes a log-likelihood ratio score for each position in a given sequence database, uses established dynamic programming methods to convert this score to a P-value and then applies false discovery rate analysis to estimate a q-value for each position in the given sequence. FIMO provides output in a variety of formats, including HTML, XML and several Santa Cruz Genome Browser formats. The program is efficient, allowing for the scanning of DNA sequences at a rate of 3.5 Mb/s on a single CPU.Availability and Implementation: FIMO is part of the MEME Suite software toolkit. A web server and source code are available at http://meme.sdsc.edu.Contact:t.bailey@imb.uq.edu.au; t.bailey@imb.uq.edu.auSupplementary information:Supplementary data are available at Bioinformatics online.}, number = {7}, urldate = {2017-08-30}, journal = {Bioinformatics}, author = {Grant, Charles E. and Bailey, Timothy L. and Noble, William Stafford}, month = apr, year = {2011}, pages = {1017--1018}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/EJG8BJ8F/Grant et al. - 2011 - FIMO scanning for occurrences of a given motif.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/DIAU79C4/btr064.html:text/html} } @article{beckstette_fast_2006, title = {Fast index based algorithms and software for matching position specific scoring matrices}, volume = {7}, issn = {1471-2105}, url = {https://doi.org/10.1186/1471-2105-7-389}, doi = {10.1186/1471-2105-7-389}, abstract = {In biological sequence analysis, position specific scoring matrices (PSSMs) are widely used to represent sequence motifs in nucleotide as well as amino acid sequences. Searching with PSSMs in complete genomes or large sequence databases is a common, but computationally expensive task.}, journal = {BMC Bioinformatics}, author = {Beckstette, Michael and Homann, Robert and Giegerich, Robert and Kurtz, Stefan}, month = aug, year = {2006}, pages = {389}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/DVPWJ2C6/Beckstette et al. - 2006 - Fast index based algorithms and software for match.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/REDQ96S6/1471-2105-7-389.html:text/html} } @article{raykov_what_2016, title = {What to {Do} {When} {K}-{Means} {Clustering} {Fails}: {A} {Simple} yet {Principled} {Alternative} {Algorithm}}, volume = {11}, issn = {1932-6203}, shorttitle = {What to {Do} {When} {K}-{Means} {Clustering} {Fails}}, url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0162259}, doi = {10.1371/journal.pone.0162259}, abstract = {The K-means algorithm is one of the most popular clustering algorithms in current use as it is relatively fast yet simple to understand and deploy in practice. Nevertheless, its use entails certain restrictive assumptions about the data, the negative consequences of which are not always immediately apparent, as we demonstrate. While more flexible algorithms have been developed, their widespread use has been hindered by their computational and technical complexity. Motivated by these considerations, we present a flexible alternative to K-means that relaxes most of the assumptions, whilst remaining almost as fast and simple. This novel algorithm which we call MAP-DP (maximum a-posteriori Dirichlet process mixtures), is statistically rigorous as it is based on nonparametric Bayesian Dirichlet process mixture modeling. This approach allows us to overcome most of the limitations imposed by K-means. The number of clusters K is estimated from the data instead of being fixed a-priori as in K-means. In addition, while K-means is restricted to continuous data, the MAP-DP framework can be applied to many kinds of data, for example, binary, count or ordinal data. Also, it can efficiently separate outliers from the data. This additional flexibility does not incur a significant computational overhead compared to K-means with MAP-DP convergence typically achieved in the order of seconds for many practical problems. Finally, in contrast to K-means, since the algorithm is based on an underlying statistical model, the MAP-DP framework can deal with missing data and enables model testing such as cross validation in a principled way. We demonstrate the simplicity and effectiveness of this algorithm on the health informatics problem of clinical sub-typing in a cluster of diseases known as parkinsonism.}, number = {9}, urldate = {2017-10-20}, journal = {PLOS ONE}, author = {Raykov, Yordan P. and Boukouvalas, Alexis and Baig, Fahd and Little, Max A.}, month = sep, year = {2016}, keywords = {Algorithms, Parkinson disease, Radii, Clustering algorithms, Myoclonus, Eyes, Permutation, Random variables}, pages = {e0162259}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SD4VR3ZM/Raykov et al. - 2016 - What to Do When K-Means Clustering Fails A Simple.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SCJRU8B8/article.html:text/html} } @article{porro_functional_2014, title = {Functional characterization of the {TERRA} transcriptome at damaged telomeres}, volume = {5}, copyright = {2014 Nature Publishing Group}, issn = {2041-1723}, url = {https://www.nature.com/articles/ncomms6379}, doi = {10.1038/ncomms6379}, abstract = {{\textless}p{\textgreater} Telomere uncapping in senescent cells is accompanied by loss of the TRF2 telomere capping factor and upregulation of the long noncoding RNA TERRA. Here the authors characterize the TERRA transcriptome and show that TERRA upregulation may promote SUV39H1 recruitment, H3K9 trimethylation and telomere end-to-end fusions.{\textless}/p{\textgreater}}, language = {en}, urldate = {2017-11-21}, journal = {Nature Communications}, author = {Porro, Antonio and Feuerhahn, Sascha and Delafontaine, Julien and Riethman, Harold and Rougemont, Jacques and Lingner, Joachim}, month = oct, year = {2014}, pages = {ncomms6379}, file = {Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/C3H58PDX/ncomms6379.html:text/html} } @article{das_survey_2007, title = {A survey of {DNA} motif finding algorithms}, volume = {8}, issn = {1471-2105}, url = {https://doi.org/10.1186/1471-2105-8-S7-S21}, doi = {10.1186/1471-2105-8-S7-S21}, abstract = {Unraveling the mechanisms that regulate gene expression is a major challenge in biology. An important task in this challenge is to identify regulatory elements, especially the binding sites in deoxyribonucleic acid (DNA) for transcription factors. These binding sites are short DNA segments that are called motifs. Recent advances in genome sequence availability and in high-throughput gene expression analysis technologies have allowed for the development of computational methods for motif finding. As a result, a large number of motif finding algorithms have been implemented and applied to various motif models over the past decade. This survey reviews the latest developments in DNA motif finding algorithms.}, number = {7}, journal = {BMC Bioinformatics}, author = {Das, Modan K. and Dai, Ho-Kwok}, month = nov, year = {2007}, pages = {S21}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TENU8HJ8/Das et Dai - 2007 - A survey of DNA motif finding algorithms.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FSHUHRF4/1471-2105-8-S7-S21.html:text/html} } @article{haberle_promoter_2016, series = {Transcriptional {Enhancers}}, title = {Promoter architectures and developmental gene regulation}, volume = {57}, issn = {1084-9521}, url = {http://www.sciencedirect.com/science/article/pii/S1084952116300143}, doi = {10.1016/j.semcdb.2016.01.014}, abstract = {Core promoters are minimal regions sufficient to direct accurate initiation of transcription and are crucial for regulation of gene expression. They are highly diverse in terms of associated core promoter motifs, underlying sequence composition and patterns of transcription initiation. Distinctive features of promoters are also seen at the chromatin level, including nucleosome positioning patterns and presence of specific histone modifications. Recent advances in identifying and characterizing promoters using next-generation sequencing-based technologies have provided the basis for their classification into functional groups and have shed light on their modes of regulation, with important implications for transcriptional regulation in development. This review discusses the methodology and the results of genome-wide studies that provided insight into the diversity of RNA polymerase II promoter architectures in vertebrates and other Metazoa, and the association of these architectures with distinct modes of regulation in embryonic development and differentiation.}, journal = {Seminars in Cell \& Developmental Biology}, author = {Haberle, Vanja and Lenhard, Boris}, month = sep, year = {2016}, keywords = {Transcriptional regulation, Core promoter, Transcription start sites, CAGE, Promoter types, Overlapping codes}, pages = {11--23}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/U9NFEUEZ/Haberle et Lenhard - 2016 - Promoter architectures and developmental gene regu.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/H7H6IJF4/S1084952116300143.html:text/html} } @article{lenhard_metazoan_2012, title = {Metazoan promoters: emerging characteristics and insights into transcriptional regulation}, volume = {13}, copyright = {2012 Nature Publishing Group}, issn = {1471-0064}, shorttitle = {Metazoan promoters}, url = {https://www.nature.com/articles/nrg3163}, doi = {10.1038/nrg3163}, abstract = {Promoters are crucial for gene regulation. They vary greatly in terms of associated regulatory elements, sequence motifs, the choice of transcription start sites and other features. Several technologies that harness next-generation sequencing have enabled recent advances in identifying promoters and their features, helping researchers who are investigating functional categories of promoters and their modes of regulation. Additional features of promoters that are being characterized include types of histone modifications, nucleosome positioning, RNA polymerase pausing and novel small RNAs. In this Review, we discuss recent findings relating to metazoan promoters and how these findings are leading to a revised picture of what a gene promoter is and how it works.}, language = {en}, number = {4}, urldate = {2018-02-20}, journal = {Nature Reviews Genetics}, author = {Lenhard, Boris and Sandelin, Albin and Carninci, Piero}, month = apr, year = {2012}, pages = {233--245}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3A4V79DS/Lenhard et al. - 2012 - Metazoan promoters emerging characteristics and i.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/WQFCRVTM/nrg3163.html:text/html} } @article{yang_prevalence_2007, title = {Prevalence of the initiator over the {TATA} box in human and yeast genes and identification of {DNA} motifs enriched in human {TATA}-less core promoters}, volume = {389}, issn = {0378-1119}, url = {http://www.sciencedirect.com/science/article/pii/S0378111906006238}, doi = {10.1016/j.gene.2006.09.029}, abstract = {The core promoter of eukaryotic genes is the minimal DNA region that recruits the basal transcription machinery to direct efficient and accurate transcription initiation. The fraction of human and yeast genes that contain specific core promoter elements such as the TATA box and the initiator (INR) remains unclear and core promoter motifs specific for TATA-less genes remain to be identified. Here, we present genome-scale computational analyses indicating that ∼76\% of human core promoters lack TATA-like elements, have a high GC content, and are enriched in Sp1-binding sites. We further identify two motifs – M3 (SCGGAAGY) and M22 (TGCGCANK) – that occur preferentially in human TATA-less core promoters. About 24\% of human genes have a TATA-like element and their promoters are generally AT-rich; however, only ∼10\% of these TATA-containing promoters have the canonical TATA box (TATAWAWR). In contrast, ∼46\% of human core promoters contain the consensus INR (YYANWYY) and ∼30\% are INR-containing TATA-less genes. Significantly, ∼46\% of human promoters lack both TATA-like and consensus INR elements. Surprisingly, mammalian-type INR sequences are present – and tend to cluster – in the transcription start site (TSS) region of ∼40\% of yeast core promoters and the frequency of specific core promoter types appears to be conserved in yeast and human genomes. Gene Ontology analyses reveal that TATA-less genes in humans, as in yeast, are frequently involved in basic “housekeeping” processes, while TATA-containing genes are more often highly regulated, such as by biotic or stress stimuli. These results reveal unexpected similarities in the occurrence of specific core promoter types and in their associated biological processes in yeast and humans and point to novel vertebrate-specific DNA motifs that might play a selective role in TATA-independent transcription.}, number = {1}, journal = {Gene}, author = {Yang, Chuhu and Bolotin, Eugene and Jiang, Tao and Sladek, Frances M. and Martinez, Ernest}, month = mar, year = {2007}, keywords = {transcription, Genome-wide computational analysis, Core promoter elements, Sp1, ELK-1, M22, Motif distribution}, pages = {52--65}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9PU5KSBT/Yang et al. - 2007 - Prevalence of the initiator over the TATA box in h.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8PZSB8IF/S0378111906006238.html:text/html} } @article{ye_seqminer:_2011, title = {{seqMINER}: an integrated {ChIP}-seq data interpretation platform}, volume = {39}, issn = {0305-1048}, shorttitle = {{seqMINER}}, url = {https://academic.oup.com/nar/article/39/6/e35/2411697}, doi = {10.1093/nar/gkq1287}, abstract = {In a single experiment, chromatin immunoprecipitation combined with high throughput sequencing (ChIP-seq) provides genome-wide information about a given covalent histone modification or transcription factor occupancy. However, time efficient bioinformatics resources for extracting biological meaning out of these gigabyte-scale datasets are often a limiting factor for data interpretation by biologists. We created an integrated portable ChIP-seq data interpretation platform called seqMINER, with optimized performances for efficient handling of multiple genome-wide datasets. seqMINER allows comparison and integration of multiple ChIP-seq datasets and extraction of qualitative as well as quantitative information. seqMINER can handle the biological complexity of most experimental situations and proposes methods to the user for data classification according to the analysed features. In addition, through multiple graphical representations, seqMINER allows visualization and modelling of general as well as specific patterns in a given dataset. To demonstrate the efficiency of seqMINER, we have carried out a comprehensive analysis of genome-wide chromatin modification data in mouse embryonic stem cells to understand the global epigenetic landscape and its change through cellular differentiation.}, language = {en}, number = {6}, urldate = {2018-02-21}, journal = {Nucleic Acids Research}, author = {Ye, Tao and Krebs, Arnaud R. and Choukrallah, Mohamed-Amin and Keime, Celine and Plewniak, Frederic and Davidson, Irwin and Tora, Laszlo}, month = mar, year = {2011}, pages = {e35--e35}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4RHT8HUV/Ye et al. - 2011 - seqMINER an integrated ChIP-seq data interpretati.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UCQ448QG/2411697.html:text/html} } @article{castro-mondragon_rsat_2017, title = {{RSAT} matrix-clustering: dynamic exploration and redundancy reduction of transcription factor binding motif collections}, volume = {45}, issn = {0305-1048}, shorttitle = {{RSAT} matrix-clustering}, url = {https://academic.oup.com/nar/article/45/13/e119/3862068}, doi = {10.1093/nar/gkx314}, abstract = {Transcription factor (TF) databases contain multitudes of binding motifs (TFBMs) from various sources, from which non-redundant collections are derived by manual curation. The advent of high-throughput methods stimulated the production of novel collections with increasing numbers of motifs. Meta-databases, built by merging these collections, contain redundant versions, because available tools are not suited to automatically identify and explore biologically relevant clusters among thousands of motifs. Motif discovery from genome-scale data sets (e.g. ChIP-seq) also produces redundant motifs, hampering the interpretation of results. We present matrix-clustering, a versatile tool that clusters similar TFBMs into multiple trees, and automatically creates non-redundant TFBM collections. A feature unique to matrix-clustering is its dynamic visualisation of aligned TFBMs, and its capability to simultaneously treat multiple collections from various sources. We demonstrate that matrix-clustering considerably simplifies the interpretation of combined results from multiple motif discovery tools, and highlights biologically relevant variations of similar motifs. We also ran a large-scale application to cluster ∼11 000 motifs from 24 entire databases, showing that matrix-clustering correctly groups motifs belonging to the same TF families, and drastically reduced motif redundancy. matrix-clustering is integrated within the RSAT suite (http://rsat.eu/), accessible through a user-friendly web interface or command-line for its integration in pipelines.}, language = {en}, number = {13}, urldate = {2018-03-21}, journal = {Nucleic Acids Research}, author = {Castro-Mondragon, Jaime Abraham and Jaeger, Sébastien and Thieffry, Denis and Thomas-Chollier, Morgane and van Helden, Jacques}, month = jul, year = {2017}, pages = {e119--e119}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5ZSJZ2JK/Castro-Mondragon et al. - 2017 - RSAT matrix-clustering dynamic exploration and re.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/X54R4KMJ/3862068.html:text/html} } @article{li_cd-hit:_2006, title = {Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences}, volume = {22}, issn = {1367-4803}, shorttitle = {Cd-hit}, url = {https://academic.oup.com/bioinformatics/article/22/13/1658/194225}, doi = {10.1093/bioinformatics/btl158}, abstract = {Motivation: In 2001 and 2002, we published two papers (Bioinformatics, 17, 282–283, Bioinformatics, 18, 77–82) describing an ultrafast protein sequence clustering program called cd-hit. This program can efficiently cluster a huge protein database with millions of sequences. However, the applications of the underlying algorithm are not limited to only protein sequences clustering, here we present several new programs using the same algorithm including cd-hit-2d, cd-hit-est and cd-hit-est-2d. Cd-hit-2d compares two protein datasets and reports similar matches between them; cd-hit-est clusters a DNA/RNA sequence database and cd-hit-est-2d compares two nucleotide datasets. All these programs can handle huge datasets with millions of sequences and can be hundreds of times faster than methods based on the popular sequence comparison and database search tools, such as BLAST.Availability:Contact:liwz@sdsc.edu}, language = {en}, number = {13}, urldate = {2018-03-21}, journal = {Bioinformatics}, author = {Li, Weizhong and Godzik, Adam}, month = jul, year = {2006}, pages = {1658--1659}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8PTNE6P7/Li et Godzik - 2006 - Cd-hit a fast program for clustering and comparin.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/C8G49ZEK/194225.html:text/html} } @article{bucher_weight_1990, title = {Weight matrix descriptions of four eukaryotic {RNA} polymerase {II} promoter elements derived from 502 unrelated promoter sequences}, volume = {212}, issn = {0022-2836}, url = {http://www.sciencedirect.com/science/article/pii/0022283690902239}, doi = {10.1016/0022-2836(90)90223-9}, abstract = {Optimized weight matrices defining four major eukaryotic promoter elements, the TATA-box, cap signal, CCAAT-, and GC-box, are presented; they were derived by comparative sequence analysis of 502 unrelated RNA polymerase II promoter regions. The new TATA-box and cap signal descriptions differ in several respects from the only hitherto available base frequency Tables. The CCAAT-box matrix, obtained with no prior assumption but CCAAT being the core of the motif, reflects precisely the sequence specificity of the recently discovered nuclear factor NY-I/CP1 but does not include typical recognition sequences of two other purported CCAAT-binding proteins, CTF and CBP. The GC-box description is longer than the previously proposed consensus sequences but is consistent with Sp1 protein-DNA binding data. The notion of a CACCC element distinct from the GC-box seems not to be justified any longer in view of the new weight matrix. Unlike the two fixed-distance elements, neither the CCAAT- nor the GC-box occurs at significantly high frequency in the upstream regions of non-vertebrate genes. Preliminary attempts to predict promoters with the aid of the new signal descriptions were unexpectedly successful. The new TATA-box matrix locates eukaryotic transcription initiation sites as reliably as do the best currently available methods to map Escherichia coli promoters. This analysis was made possible by the recently established Eukaryotic Promoter Database (EPD) of the EMBL Nucleotide Sequence Data Library. In order to derive the weight matrices, a novel algorithm has been devised that is generally applicable to sequence motifs positionally correlated with a biologically defined position in the sequences. The signal must be sufficiently over-represented in a particular region relative to the given site, but need not be present in all members of the input sequence collection. The algorithm iteratively redefines the set of putative motif representatives from which a weight matrix is derived, so as to maximize a quantitative measure of local over-representation, an optimization criterion that naturally combines structural and positional constancy. A comprehensive description of the technique is presented in Methods and Data.}, number = {4}, journal = {Journal of Molecular Biology}, author = {Bucher, Philipp}, month = apr, year = {1990}, pages = {563--578}, file = {Bucher90.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Bucher90.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KSDSN7JT/0022283690902239.html:text/html} } @article{buenrostro_transposition_2013, title = {Transposition of native chromatin for fast and sensitive epigenomic profiling of open chromatin, {DNA}-binding proteins and nucleosome position}, volume = {10}, copyright = {2013 Nature Publishing Group}, issn = {1548-7105}, url = {https://www.nature.com/articles/nmeth.2688}, doi = {10.1038/nmeth.2688}, abstract = {We describe an assay for transposase-accessible chromatin using sequencing (ATAC-seq), based on direct in vitro transposition of sequencing adaptors into native chromatin, as a rapid and sensitive method for integrative epigenomic analysis. ATAC-seq captures open chromatin sites using a simple two-step protocol with 500–50,000 cells and reveals the interplay between genomic locations of open chromatin, DNA-binding proteins, individual nucleosomes and chromatin compaction at nucleotide resolution. We discovered classes of DNA-binding factors that strictly avoided, could tolerate or tended to overlap with nucleosomes. Using ATAC-seq maps of human CD4+ T cells from a proband obtained on consecutive days, we demonstrated the feasibility of analyzing an individual's epigenome on a timescale compatible with clinical decision-making.}, language = {en}, number = {12}, urldate = {2018-03-23}, journal = {Nature Methods}, author = {Buenrostro, Jason D. and Giresi, Paul G. and Zaba, Lisa C. and Chang, Howard Y. and Greenleaf, William J.}, month = dec, year = {2013}, pages = {1213--1218}, file = {Buenrostro et al. - 2013 - Supplemental.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/A62IVVST/Buenrostro et al. - 2013 - Supplemental.pdf:application/pdf;Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/A62IVVST/Buenrostro et al. - 2013 - Transposition of native chromatin for fast and sen.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/B4S85K35/nmeth.html:text/html} } @article{duttke_human_2015-1, title = {Human {Promoters} {Are} {Intrinsically} {Directional}}, volume = {57}, issn = {1097-2765}, url = {http://www.cell.com/molecular-cell/abstract/S1097-2765(14)01007-7}, doi = {10.1016/j.molcel.2014.12.029}, language = {English}, number = {4}, urldate = {2018-04-03}, journal = {Molecular Cell}, author = {Duttke, Sascha H. C. and Lacadie, Scott A. and Ibrahim, Mahmoud M. and Glass, Christopher K. and Corcoran, David L. and Benner, Christopher and Heinz, Sven and Kadonaga, James T. and Ohler, Uwe}, month = feb, year = {2015}, pmid = {25639469}, pages = {674--684}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/72BCMJ2J/Duttke et al. - 2015 - Human Promoters Are Intrinsically Directional.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/K2P9ZEZV/S1097-2765(14)01007-7.html:text/html} } @article{tehranchi_pooled_2016, title = {Pooled {ChIP}-{Seq} {Links} {Variation} in {Transcription} {Factor} {Binding} to {Complex} {Disease} {Risk}}, volume = {165}, issn = {0092-8674, 1097-4172}, url = {http://www.cell.com/cell/abstract/S0092-8674(16)30339-7}, doi = {10.1016/j.cell.2016.03.041}, language = {English}, number = {3}, urldate = {2018-04-11}, journal = {Cell}, author = {Tehranchi, Ashley K. and Myrthil, Marsha and Martin, Trevor and Hie, Brian L. and Golan, David and Fraser, Hunter B.}, month = apr, year = {2016}, pmid = {27087447}, pages = {730--741}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/DIHDRW7A/Tehranchi et al. - 2016 - Pooled ChIP-Seq Links Variation in Transcription F.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/WW7SSNNZ/S0092-8674(16)30339-7.html:text/html} } @article{stormo_identifying_1989, title = {Identifying protein-binding sites from unaligned {DNA} fragments.}, volume = {86}, issn = {0027-8424}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC286650/}, abstract = {The ability to determine important features within DNA sequences from the sequences alone is becoming essential as large-scale sequencing projects are being undertaken. We present a method that can be applied to the problem of identifying the recognition pattern for a DNA-binding protein given only a collection of sequenced DNA fragments, each known to contain somewhere within it a binding site for that protein. Information about the position or orientation of the binding sites within those fragments is not needed. The method compares the "information content" of a large number of possible binding site alignments to arrive at a matrix representation of the binding site pattern. The specificity of the protein is represented as a matrix, rather than a consensus sequence, allowing patterns that are typical of regulatory protein-binding sites to be identified. The reliability of the method improves as the number of sequences increases, but the time required increases only linearly with the number of sequences. An example, using known cAMP receptor protein-binding sites, illustrates the method.}, number = {4}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, author = {Stormo, G D and Hartzell, G W}, month = feb, year = {1989}, pmid = {2919167}, pmcid = {PMC286650}, pages = {1183--1187}, file = {PubMed Central Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/P8H57Z5V/Stormo et Hartzell - 1989 - Identifying protein-binding sites from unaligned D.pdf:application/pdf} } @article{li_sequence_2002, title = {Sequence clustering strategies improve remote homology recognitions while reducing search times}, volume = {15}, issn = {1741-0126}, url = {https://academic.oup.com/peds/article/15/8/643/1532107}, doi = {10.1093/protein/15.8.643}, abstract = {Abstract. Sequence databases are rapidly growing, thereby increasing the coverage of protein sequence space, but this coverage is uneven because most sequencin}, language = {en}, number = {8}, urldate = {2018-06-21}, journal = {Protein Engineering, Design and Selection}, author = {Li, Weizhong and Jaroszewski, Lukasz and Godzik, Adam}, month = aug, year = {2002}, pages = {643--649}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8UE54UA8/Li et al. - 2002 - Sequence clustering strategies improve remote homo.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/D343QNAZ/1532107.html:text/html} } @article{schaid_genome-wide_2018, title = {From genome-wide associations to candidate causal variants by statistical fine-mapping}, volume = {19}, copyright = {2018 Macmillan Publishers Ltd., part of Springer Nature}, issn = {1471-0064}, url = {https://www.nature.com/articles/s41576-018-0016-z}, doi = {10.1038/s41576-018-0016-z}, abstract = {Fine-mapping is the process by which a trait-associated region from a genome-wide association study (GWAS) is analysed to identify the particular genetic variants that are likely to causally influence the examined trait. This Review discusses the diverse statistical approaches to fine-mapping and their foundations, strengths and limitations, including integration of trans-ethnic human population data and functional annotations.}, language = {en}, number = {8}, urldate = {2018-07-17}, journal = {Nature Reviews Genetics}, author = {Schaid, Daniel J. and Chen, Wenan and Larson, Nicholas B.}, month = aug, year = {2018}, pages = {491--504}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9DTQSD64/Schaid et al. - 2018 - From genome-wide associations to candidate causal .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HF3NTMN5/s41576-018-0016-z.html:text/html} } @article{andreatta_gibbscluster:_2017, title = {{GibbsCluster}: unsupervised clustering and alignment of peptide sequences}, volume = {45}, issn = {0305-1048}, shorttitle = {{GibbsCluster}}, url = {https://academic.oup.com/nar/article/45/W1/W458/3605637}, doi = {10.1093/nar/gkx248}, abstract = {Abstract. Receptor interactions with short linear peptide fragments (ligands) are at the base of many biological signaling processes. Conserved and information}, language = {en}, number = {W1}, urldate = {2018-08-07}, journal = {Nucleic Acids Research}, author = {Andreatta, Massimo and Alvarez, Bruno and Nielsen, Morten}, month = jul, year = {2017}, pages = {W458--W463}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GVGDMPB9/Andreatta et al. - 2017 - GibbsCluster unsupervised clustering and alignmen.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TFSVZ862/3605637.html:text/html} } @article{andreatta_simultaneous_2013, title = {Simultaneous alignment and clustering of peptide data using a {Gibbs} sampling approach}, volume = {29}, issn = {1367-4803}, url = {https://academic.oup.com/bioinformatics/article/29/1/8/272260}, doi = {10.1093/bioinformatics/bts621}, abstract = {Abstract. Motivation: Proteins recognizing short peptide fragments play a central role in cellular signaling. As a result of high-throughput technologies, pept}, language = {en}, number = {1}, urldate = {2018-08-08}, journal = {Bioinformatics}, author = {Andreatta, Massimo and Lund, Ole and Nielsen, Morten}, month = jan, year = {2013}, pages = {8--14}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UHJ8FJM4/Andreatta et al. - 2013 - Simultaneous alignment and clustering of peptide d.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/2ZQ6BW86/272260.html:text/html} } @article{lawrence_detecting_1993, title = {Detecting subtle sequence signals: a {Gibbs} sampling strategy for multiple alignment}, volume = {262}, copyright = {© 1993}, issn = {0036-8075, 1095-9203}, shorttitle = {Detecting subtle sequence signals}, url = {http://science.sciencemag.org/content/262/5131/208}, doi = {10.1126/science.8211139}, abstract = {A wealth of protein and DNA sequence data is being generated by genome projects and other sequencing efforts. A crucial barrier to deciphering these sequences and understanding the relations among them is the difficulty of detecting subtle local residue patterns common to multiple sequences. Such patterns frequently reflect similar molecular structures and biological properties. A mathematical definition of this "local multiple alignment" problem suitable for full computer automation has been used to develop a new and sensitive algorithm, based on the statistical method of iterative sampling. This algorithm finds an optimized local alignment model for N sequences in N-linear time, requiring only seconds on current workstations, and allows the simultaneous detection and optimization of multiple patterns and pattern repeats. The method is illustrated as applied to helix-turn-helix proteins, lipocalins, and prenyltransferases.}, language = {en}, number = {5131}, urldate = {2018-08-15}, journal = {Science}, author = {Lawrence, C. E. and Altschul, S. F. and Boguski, M. S. and Liu, J. S. and Neuwald, A. F. and Wootton, J. C.}, month = oct, year = {1993}, pmid = {8211139}, pages = {208--214}, file = {Lawrence93.pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Lawrence93.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/327XWVSX/208.html:text/html} } @article{rosenbloom_encode_2013, title = {{ENCODE} {Data} in the {UCSC} {Genome} {Browser}: year 5 update}, volume = {41}, issn = {0305-1048}, shorttitle = {{ENCODE} {Data} in the {UCSC} {Genome} {Browser}}, url = {https://academic.oup.com/nar/article/41/D1/D56/1066727}, doi = {10.1093/nar/gks1172}, abstract = {Abstract. The Encyclopedia of DNA Elements (ENCODE), http://encodeproject.org, has completed its fifth year of scientific collaboration to create a comprehensi}, language = {en}, number = {D1}, urldate = {2018-09-24}, journal = {Nucleic Acids Research}, author = {Rosenbloom, Kate R. and Sloan, Cricket A. and Malladi, Venkat S. and Dreszer, Timothy R. and Learned, Katrina and Kirkup, Vanessa M. and Wong, Matthew C. and Maddren, Morgan and Fang, Ruihua and Heitner, Steven G. and Lee, Brian T. and Barber, Galt P. and Harte, Rachel A. and Diekhans, Mark and Long, Jeffrey C. and Wilder, Steven P. and Zweig, Ann S. and Karolchik, Donna and Kuhn, Robert M. and Haussler, David and Kent, W. James}, month = jan, year = {2013}, pages = {D56--D63}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HXMKXF6F/Rosenbloom et al. - 2013 - ENCODE Data in the UCSC Genome Browser year 5 upd.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UV7N94N9/1066727.html:text/html} } @article{bailey_meme_2009, title = {{MEME} {Suite}: tools for motif discovery and searching}, volume = {37}, issn = {0305-1048}, shorttitle = {{MEME} {Suite}}, url = {https://academic.oup.com/nar/article/37/suppl_2/W202/1135092}, doi = {10.1093/nar/gkp335}, abstract = {Abstract. The MEME Suite web server provides a unified portal for online discovery and analysis of sequence motifs representing features such as DNA binding si}, language = {en}, number = {suppl\_2}, urldate = {2018-09-25}, journal = {Nucleic Acids Research}, author = {Bailey, Timothy L. and Boden, Mikael and Buske, Fabian A. and Frith, Martin and Grant, Charles E. and Clementi, Luca and Ren, Jingyuan and Li, Wilfred W. and Noble, William S.}, month = jul, year = {2009}, pages = {W202--W208}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FSJB5Q2B/Bailey et al. - 2009 - MEME Suite tools for motif discovery and searchin.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/84WXK6RS/1135092.html:text/html} } @article{dreos_mga_2018, title = {{MGA} repository: a curated data resource for {ChIP}-seq and other genome annotated data}, volume = {46}, issn = {0305-1048}, shorttitle = {{MGA} repository}, url = {https://academic.oup.com/nar/article/46/D1/D175/4563313}, doi = {10.1093/nar/gkx995}, abstract = {Abstract. The Mass Genome Annotation (MGA) repository is a resource designed to store published next generation sequencing data and other genome annotation dat}, language = {en}, number = {D1}, urldate = {2018-10-02}, journal = {Nucleic Acids Research}, author = {Dreos, René and Ambrosini, Giovanna and Groux, Romain and Périer, Rouayda Cavin and Bucher, Philipp}, month = jan, year = {2018}, pages = {D175--D180}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/B3GGR38V/Dréos et al. - 2018 - MGA repository a curated data resource for ChIP-s.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9VUC447P/4563313.html:text/html} } @article{ambrosini_pwmscan:_2018, title = {{PWMScan}: a fast tool for scanning entire genomes with a position-specific weight matrix}, volume = {34}, issn = {1367-4803}, shorttitle = {{PWMScan}}, url = {https://academic.oup.com/bioinformatics/article/34/14/2483/4921176}, doi = {10.1093/bioinformatics/bty127}, abstract = {AbstractSummary. Transcription factors regulate gene expression by binding to specific short DNA sequences of 5–20 bp to regulate the rate of transcription of}, language = {en}, number = {14}, urldate = {2018-10-31}, journal = {Bioinformatics}, author = {Ambrosini, Giovanna and Groux, Romain and Bucher, Philipp}, month = jul, year = {2018}, pages = {2483--2484}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7BG8Z46H/Ambrosini et al. - 2018 - PWMScan a fast tool for scanning entire genomes w.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KDHTMW7B/4921176.html:text/html} } @article{kulakovskiy_hocomoco:_2018, title = {{HOCOMOCO}: towards a complete collection of transcription factor binding models for human and mouse via large-scale {ChIP}-{Seq} analysis}, volume = {46}, issn = {0305-1048}, shorttitle = {{HOCOMOCO}}, url = {https://academic.oup.com/nar/article/46/D1/D252/4616875}, doi = {10.1093/nar/gkx1106}, abstract = {Abstract. We present a major update of the HOCOMOCO collection that consists of patterns describing DNA binding specificities for human and mouse transcription}, language = {en}, number = {D1}, urldate = {2018-10-31}, journal = {Nucleic Acids Research}, author = {Kulakovskiy, Ivan V. and Vorontsov, Ilya E. and Yevshin, Ivan S. and Sharipov, Ruslan N. and Fedorova, Alla D. and Rumynskiy, Eugene I. and Medvedeva, Yulia A. and Magana-Mora, Arturo and Bajic, Vladimir B. and Papatsenko, Dmitry A. and Kolpakov, Fedor A. and Makeev, Vsevolod J.}, month = jan, year = {2018}, pages = {D252--D259}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/M43KTC5T/Kulakovskiy et al. - 2018 - HOCOMOCO towards a complete collection of transcr.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/99VQ7MZK/4616875.html:text/html} } @article{khan_jaspar_2018, title = {{JASPAR} 2018: update of the open-access database of transcription factor binding profiles and its web framework}, volume = {46}, issn = {0305-1048}, shorttitle = {{JASPAR} 2018}, url = {https://academic.oup.com/nar/article/46/D1/D260/4621338}, doi = {10.1093/nar/gkx1126}, abstract = {Abstract. JASPAR (http://jaspar.genereg.net) is an open-access database of curated, non-redundant transcription factor (TF)-binding profiles stored as position}, language = {en}, number = {D1}, urldate = {2018-10-31}, journal = {Nucleic Acids Research}, author = {Khan, Aziz and Fornes, Oriol and Stigliani, Arnaud and Gheorghe, Marius and Castro-Mondragon, Jaime A. and van der Lee, Robin and Bessy, Adrien and Chèneby, Jeanne and Kulkarni, Shubhada R. and Tan, Ge and Baranasic, Damir and Arenillas, David J. and Sandelin, Albin and Vandepoele, Klaas and Lenhard, Boris and Ballester, Benoît and Wasserman, Wyeth W. and Parcy, François and Mathelier, Anthony}, month = jan, year = {2018}, pages = {D260--D266}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TZUMWSCF/Khan et al. - 2018 - JASPAR 2018 update of the open-access database of.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/EUQZE2J9/4621338.html:text/html} } @article{chatr-aryamontri_biogrid_2017, title = {The {BioGRID} interaction database: 2017 update}, volume = {45}, issn = {0305-1048}, shorttitle = {The {BioGRID} interaction database}, url = {https://academic.oup.com/nar/article/45/D1/D369/2681732}, doi = {10.1093/nar/gkw1102}, abstract = {Abstract. The Biological General Repository for Interaction Datasets (BioGRID: https://thebiogrid.org) is an open access database dedicated to the annotation a}, language = {en}, number = {D1}, urldate = {2018-10-31}, journal = {Nucleic Acids Research}, author = {Chatr-aryamontri, Andrew and Oughtred, Rose and Boucher, Lorrie and Rust, Jennifer and Chang, Christie and Kolas, Nadine K. and O'Donnell, Lara and Oster, Sara and Theesfeld, Chandra and Sellam, Adnane and Stark, Chris and Breitkreutz, Bobby-Joe and Dolinski, Kara and Tyers, Mike}, month = jan, year = {2017}, pages = {D369--D379}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/V5XI5JI8/Chatr-aryamontri et al. - 2017 - The BioGRID interaction database 2017 update.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/R63HKK9T/2681732.html:text/html} } @article{djebali_landscape_2012, title = {Landscape of transcription in human cells}, volume = {489}, copyright = {2012 Nature Publishing Group}, issn = {1476-4687}, url = {https://www.nature.com/articles/nature11233}, doi = {10.1038/nature11233}, abstract = {Eukaryotic cells make many types of primary and processed RNAs that are found either in specific subcellular compartments or throughout the cells. A complete catalogue of these RNAs is not yet available and their characteristic subcellular localizations are also poorly understood. Because RNA represents the direct output of the genetic information encoded by genomes and a significant proportion of a cell’s regulatory capabilities are focused on its synthesis, processing, transport, modification and translation, the generation of such a catalogue is crucial for understanding genome function. Here we report evidence that three-quarters of the human genome is capable of being transcribed, as well as observations about the range and levels of expression, localization, processing fates, regulatory regions and modifications of almost all currently annotated and thousands of previously unannotated RNAs. These observations, taken together, prompt a redefinition of the concept of a gene.}, language = {en}, number = {7414}, urldate = {2018-11-12}, journal = {Nature}, author = {Djebali, Sarah and Davis, Carrie A. and Merkel, Angelika and Dobin, Alex and Lassmann, Timo and Mortazavi, Ali and Tanzer, Andrea and Lagarde, Julien and Lin, Wei and Schlesinger, Felix and Xue, Chenghai and Marinov, Georgi K. and Khatun, Jainab and Williams, Brian A. and Zaleski, Chris and Rozowsky, Joel and Röder, Maik and Kokocinski, Felix and Abdelhamid, Rehab F. and Alioto, Tyler and Antoshechkin, Igor and Baer, Michael T. and Bar, Nadav S. and Batut, Philippe and Bell, Kimberly and Bell, Ian and Chakrabortty, Sudipto and Chen, Xian and Chrast, Jacqueline and Curado, Joao and Derrien, Thomas and Drenkow, Jorg and Dumais, Erica and Dumais, Jacqueline and Duttagupta, Radha and Falconnet, Emilie and Fastuca, Meagan and Fejes-Toth, Kata and Ferreira, Pedro and Foissac, Sylvain and Fullwood, Melissa J. and Gao, Hui and Gonzalez, David and Gordon, Assaf and Gunawardena, Harsha and Howald, Cedric and Jha, Sonali and Johnson, Rory and Kapranov, Philipp and King, Brandon and Kingswood, Colin and Luo, Oscar J. and Park, Eddie and Persaud, Kimberly and Preall, Jonathan B. and Ribeca, Paolo and Risk, Brian and Robyr, Daniel and Sammeth, Michael and Schaffer, Lorian and See, Lei-Hoon and Shahab, Atif and Skancke, Jorgen and Suzuki, Ana Maria and Takahashi, Hazuki and Tilgner, Hagen and Trout, Diane and Walters, Nathalie and Wang, Huaien and Wrobel, John and Yu, Yanbao and Ruan, Xiaoan and Hayashizaki, Yoshihide and Harrow, Jennifer and Gerstein, Mark and Hubbard, Tim and Reymond, Alexandre and Antonarakis, Stylianos E. and Hannon, Gregory and Giddings, Morgan C. and Ruan, Yijun and Wold, Barbara and Carninci, Piero and Guigó, Roderic and Gingeras, Thomas R.}, month = sep, year = {2012}, pages = {101--108}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/C2Q2SUSC/Djebali et al. - 2012 - Landscape of transcription in human cells.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XJEIHF3I/nature11233.html:text/html} } @article{hesselberth_global_2009, title = {Global mapping of protein-{DNA} interactions \textit{in vivo} by digital genomic footprinting}, volume = {6}, copyright = {2009 Nature Publishing Group}, issn = {1548-7105}, url = {https://www.nature.com/articles/nmeth.1313}, doi = {10.1038/nmeth.1313}, abstract = {The orchestrated binding of transcriptional activators and repressors to specific DNA sequences in the context of chromatin defines the regulatory program of eukaryotic genomes. We developed a digital approach to assay regulatory protein occupancy on genomic DNA in vivo by dense mapping of individual DNase I cleavages from intact nuclei using massively parallel DNA sequencing. Analysis of {\textgreater}23 million cleavages across the Saccharomyces cerevisiae genome revealed thousands of protected regulatory protein footprints, enabling de novo derivation of factor binding motifs and the identification of hundreds of new binding sites for major regulators. We observed striking correspondence between single-nucleotide resolution DNase I cleavage patterns and protein-DNA interactions determined by crystallography. The data also yielded a detailed view of larger chromatin features including positioned nucleosomes flanking factor binding regions. Digital genomic footprinting should be a powerful approach to delineate the cis-regulatory framework of any organism with an available genome sequence.}, language = {en}, number = {4}, urldate = {2018-11-14}, journal = {Nature Methods}, author = {Hesselberth, Jay R. and Chen, Xiaoyu and Zhang, Zhihong and Sabo, Peter J. and Sandstrom, Richard and Reynolds, Alex P. and Thurman, Robert E. and Neph, Shane and Kuehn, Michael S. and Noble, William S. and Fields, Stanley and Stamatoyannopoulos, John A.}, month = apr, year = {2009}, pages = {283--289}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ZV93EHPT/Hesselberth et al. - 2009 - Global mapping of protein-DNA interactions iin v.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8G6VRRWP/nmeth.html:text/html} } @article{hon_chromasig:_2008, title = {{ChromaSig}: {A} {Probabilistic} {Approach} to {Finding} {Common} {Chromatin} {Signatures} in the {Human} {Genome}}, volume = {4}, issn = {1553-7358}, shorttitle = {{ChromaSig}}, url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000201}, doi = {10.1371/journal.pcbi.1000201}, abstract = {Computational methods to identify functional genomic elements using genetic information have been very successful in determining gene structure and in identifying a handful of cis-regulatory elements. But the vast majority of regulatory elements have yet to be discovered, and it has become increasingly apparent that their discovery will not come from using genetic information alone. Recently, high-throughput technologies have enabled the creation of information-rich epigenetic maps, most notably for histone modifications. However, tools that search for functional elements using this epigenetic information have been lacking. Here, we describe an unsupervised learning method called ChromaSig to find, in an unbiased fashion, commonly occurring chromatin signatures in both tiling microarray and sequencing data. Applying this algorithm to nine chromatin marks across a 1\% sampling of the human genome in HeLa cells, we recover eight clusters of distinct chromatin signatures, five of which correspond to known patterns associated with transcriptional promoters and enhancers. Interestingly, we observe that the distinct chromatin signatures found at enhancers mark distinct functional classes of enhancers in terms of transcription factor and coactivator binding. In addition, we identify three clusters of novel chromatin signatures that contain evolutionarily conserved sequences and potential cis-regulatory elements. Applying ChromaSig to a panel of 21 chromatin marks mapped genomewide by ChIP-Seq reveals 16 classes of genomic elements marked by distinct chromatin signatures. Interestingly, four classes containing enrichment for repressive histone modifications appear to be locally heterochromatic sites and are enriched in quickly evolving regions of the genome. The utility of this approach in uncovering novel, functionally significant genomic elements will aid future efforts of genome annotation via chromatin modifications.}, language = {en}, number = {10}, urldate = {2018-11-20}, journal = {PLOS Computational Biology}, author = {Hon, Gary and Ren, Bing and Wang, Wei}, month = oct, year = {2008}, keywords = {Chromatin, Transcription Factors, Sequence motif analysis, Histones, Histone modification, Chromatin modification, HeLa cells, T cells}, pages = {e1000201}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/K6CUWFD9/Hon et al. - 2008 - ChromaSig A Probabilistic Approach to Finding Com.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UJZ2MCBQ/article.html:text/html} } @inproceedings{arthur_k-means++:_2007, address = {Philadelphia, PA, USA}, series = {{SODA} '07}, title = {K-means++: {The} {Advantages} of {Careful} {Seeding}}, isbn = {978-0-89871-624-5}, shorttitle = {K-means++}, url = {http://dl.acm.org/citation.cfm?id=1283383.1283494}, abstract = {The k-means method is a widely used clustering technique that seeks to minimize the average squared distance between points in the same cluster. Although it offers no accuracy guarantees, its simplicity and speed are very appealing in practice. By augmenting k-means with a very simple, randomized seeding technique, we obtain an algorithm that is Θ(logk)-competitive with the optimal clustering. Preliminary experiments show that our augmentation improves both the speed and the accuracy of k-means, often quite dramatically.}, booktitle = {Proceedings of the {Eighteenth} {Annual} {ACM}-{SIAM} {Symposium} on {Discrete} {Algorithms}}, publisher = {Society for Industrial and Applied Mathematics}, author = {Arthur, David and Vassilvitskii, Sergei}, year = {2007}, pages = {1027--1035}, file = {ACM Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/62ET8WNZ/Arthur et Vassilvitskii - 2007 - K-means++ The Advantages of Careful Seeding.pdf:application/pdf} } @article{lake_integrative_2018, title = {Integrative single-cell analysis of transcriptional and epigenetic states in the human adult brain}, volume = {36}, copyright = {2017 Nature Publishing Group}, issn = {1546-1696}, url = {https://www.nature.com/articles/nbt.4038}, doi = {10.1038/nbt.4038}, abstract = {Detailed characterization of the cell types in the human brain requires scalable experimental approaches to examine multiple aspects of the molecular state of individual cells, as well as computational integration of the data to produce unified cell-state annotations. Here we report improved high-throughput methods for single-nucleus droplet-based sequencing (snDrop-seq) and single-cell transposome hypersensitive site sequencing (scTHS-seq). We used each method to acquire nuclear transcriptomic and DNA accessibility maps for {\textgreater}60,000 single cells from human adult visual cortex, frontal cortex, and cerebellum. Integration of these data revealed regulatory elements and transcription factors that underlie cell-type distinctions, providing a basis for the study of complex processes in the brain, such as genetic programs that coordinate adult remyelination. We also mapped disease-associated risk variants to specific cellular populations, which provided insights into normal and pathogenic cellular processes in the human brain. This integrative multi-omics approach permits more detailed single-cell interrogation of complex organs and tissues.}, language = {en}, number = {1}, urldate = {2018-12-03}, journal = {Nature Biotechnology}, author = {Lake, Blue B. and Chen, Song and Sos, Brandon C. and Fan, Jean and Kaeser, Gwendolyn E. and Yung, Yun C. and Duong, Thu E. and Gao, Derek and Chun, Jerold and Kharchenko, Peter V. and Zhang, Kun}, month = jan, year = {2018}, pages = {70--80}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HKGMDM7S/Lake et al. - 2018 - Integrative single-cell analysis of transcriptiona.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Z795VVN2/nbt.html:text/html} } @article{gardeux_asap:_2017, title = {{ASAP}: a web-based platform for the analysis and interactive visualization of single-cell {RNA}-seq data}, volume = {33}, issn = {1367-4803}, shorttitle = {{ASAP}}, url = {https://academic.oup.com/bioinformatics/article/33/19/3123/3852081}, doi = {10.1093/bioinformatics/btx337}, abstract = {AbstractMotivation. Single-cell RNA-sequencing (scRNA-seq) allows whole transcriptome profiling of thousands of individual cells, enabling the molecular explor}, language = {en}, number = {19}, urldate = {2018-12-03}, journal = {Bioinformatics}, author = {Gardeux, Vincent and David, Fabrice P. A. and Shajkofci, Adrian and Schwalie, Petra C. and Deplancke, Bart}, month = oct, year = {2017}, pages = {3123--3125}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GCZ8VF92/Gardeux et al. - 2017 - ASAP a web-based platform for the analysis and in.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/I8CFAWH8/3852081.html:text/html} } @article{conway_xenometool_2012, title = {Xenome—a tool for classifying reads from xenograft samples}, volume = {28}, issn = {1367-4803}, url = {https://academic.oup.com/bioinformatics/article/28/12/i172/269972}, doi = {10.1093/bioinformatics/bts236}, abstract = {Abstract. Motivation: Shotgun sequence read data derived from xenograft material contains a mixture of reads arising from the host and reads arising from the g}, language = {en}, number = {12}, urldate = {2018-12-04}, journal = {Bioinformatics}, author = {Conway, Thomas and Wazny, Jeremy and Bromage, Andrew and Tymms, Martin and Sooraj, Dhanya and Williams, Elizabeth D. and Beresford-Smith, Bryan}, month = jun, year = {2012}, pages = {i172--i178}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6IWBVCKR/Conway et al. - 2012 - Xenome—a tool for classifying reads from xenograft.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/E48ISGNC/269972.html:text/html} } @article{ernst_chromhmm:_2012, title = {{ChromHMM}: automating chromatin-state discovery and characterization}, volume = {9}, copyright = {2012 Nature Publishing Group}, issn = {1548-7105}, shorttitle = {{ChromHMM}}, url = {https://www.nature.com/articles/nmeth.1906}, doi = {10.1038/nmeth.1906}, abstract = {ChromHMM: automating chromatin-state discovery and characterization}, language = {en}, number = {3}, urldate = {2018-12-04}, journal = {Nature Methods}, author = {Ernst, Jason and Kellis, Manolis}, month = mar, year = {2012}, pages = {215--216}, file = {Ernst et Kellis - 2012 supplementals .pdf:/home/romaingroux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ADS9BET6/Ernst et Kellis - 2012 supplementals .pdf:application/pdf;Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ADS9BET6/Ernst et Kellis - 2012 - ChromHMM automating chromatin-state discovery and.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7PGXJ6UM/nmeth.html:text/html} } @article{hwang_single-cell_2018, title = {Single-cell {RNA} sequencing technologies and bioinformatics pipelines}, volume = {50}, copyright = {2018 The Author(s)}, issn = {2092-6413}, url = {https://www.nature.com/articles/s12276-018-0071-8}, doi = {10.1038/s12276-018-0071-8}, abstract = {Showing which genes are expressed, or switched on, in individual cells may help to reveal the first signs of disease. Each cell in an organism contains the same genetic information, but cell type and behavior depend on which genes are expressed. Previously, researchers could only sequence cells in batches, averaging the results, but technological improvements now allow sequencing of the genes expressed in an individual cell, known as single-cell RNA sequencing (scRNA-seq). Ji Hyun Lee (Kyung Hee University, Seoul) and Duhee Bang and Byungjin Hwang (Yonsei University, Seoul) have reviewed the available scRNA-seq technologies and the strategies available to analyze the large quantities of data produced. They conclude that scRNA-seq will impact both basic and medical science, from illuminating drug resistance in cancer to revealing the complex pathways of cell differentiation during development.}, language = {En}, number = {8}, urldate = {2018-12-04}, journal = {Experimental \& Molecular Medicine}, author = {Hwang, Byungjin and Lee, Ji Hyun and Bang, Duhee}, month = aug, year = {2018}, pages = {96}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MISMSQTX/Hwang et al. - 2018 - Single-cell RNA sequencing technologies and bioinf.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/STGSTEGK/s12276-018-0071-8.html:text/html} } @article{angerer_single_2017, series = {Big data acquisition and analysis • {Pharmacology} and drug discovery}, title = {Single cells make big data: {New} challenges and opportunities in transcriptomics}, volume = {4}, issn = {2452-3100}, shorttitle = {Single cells make big data}, url = {http://www.sciencedirect.com/science/article/pii/S245231001730077X}, doi = {10.1016/j.coisb.2017.07.004}, abstract = {Recent technological advances have enabled unprecedented insight into transcriptomics at the level of single cells. Single cell transcriptomics enables the measurement of transcriptomic information of thousands of single cells in a single experiment. The volume and complexity of resulting data make it a paradigm of big data. Consequently, the field is presented with new scientific and, in particular, analytical challenges where currently no scalable solutions exist. At the same time, exciting opportunities arise from increased resolution of single-cell RNA sequencing data and improved statistical power of ever growing datasets. Big single cell RNA sequencing data promises valuable insights into cellular heterogeneity which may significantly improve our understanding of biology and human disease. This review focuses on single cell transcriptomics and highlights the inherent opportunities and challenges in the context of big data analytics.}, journal = {Current Opinion in Systems Biology}, author = {Angerer, Philipp and Simon, Lukas and Tritschler, Sophie and Wolf, F. Alexander and Fischer, David and Theis, Fabian J.}, month = aug, year = {2017}, keywords = {Machine learning, Single-cell RNA-seq, Big data, Single-cell transcriptomics}, pages = {85--91}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/62SIJ6H6/Angerer et al. - 2017 - Single cells make big data New challenges and opp.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/VA9KMV8I/S245231001730077X.html:text/html} } @article{poirion_single-cell_2016, title = {Single-{Cell} {Transcriptomics} {Bioinformatics} and {Computational} {Challenges}}, volume = {7}, issn = {1664-8021}, url = {https://www.frontiersin.org/articles/10.3389/fgene.2016.00163/full}, doi = {10.3389/fgene.2016.00163}, abstract = {The emerging single-cell RNA-Seq (scRNA-Seq) technology holds the promise to revolutionize our understanding of diseases and associated biological processes at an unprecedented resolution. It opens the door to reveal the intercellular heterogeneity and has been employed to a variety of applications, ranging from characterizing cancer cells subpopulations to elucidating tumor resistance mechanisms. Parallel to improving experimental protocols to deal with technological issues, deriving new analytical methods to reveal the complexity in scRNA-Seq data is just as challenging. Here we review the current state-of-the-art bioinformatics tools and methods for scRNA-Seq analysis, as well as addressing some critical analytical challenges that the field faces.}, language = {English}, urldate = {2018-12-04}, journal = {Frontiers in Genetics}, author = {Poirion, Olivier B. and Zhu, Xun and Ching, Travers and Garmire, Lana}, year = {2016}, keywords = {bioinformatics, single-cell genomics, Single-Cell Analysis, heterogeneity, microevolution}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/J8JGKVTK/Poirion et al. - 2016 - Single-Cell Transcriptomics Bioinformatics and Com.pdf:application/pdf} } @article{guo_sincera:_2015, title = {{SINCERA}: {A} {Pipeline} for {Single}-{Cell} {RNA}-{Seq} {Profiling} {Analysis}}, volume = {11}, issn = {1553-7358}, shorttitle = {{SINCERA}}, url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004575}, doi = {10.1371/journal.pcbi.1004575}, abstract = {A major challenge in developmental biology is to understand the genetic and cellular processes/programs driving organ formation and differentiation of the diverse cell types that comprise the embryo. While recent studies using single cell transcriptome analysis illustrate the power to measure and understand cellular heterogeneity in complex biological systems, processing large amounts of RNA-seq data from heterogeneous cell populations creates the need for readily accessible tools for the analysis of single-cell RNA-seq (scRNA-seq) profiles. The present study presents a generally applicable analytic pipeline (SINCERA: a computational pipeline for SINgle CEll RNA-seq profiling Analysis) for processing scRNA-seq data from a whole organ or sorted cells. The pipeline supports the analysis for: 1) the distinction and identification of major cell types; 2) the identification of cell type specific gene signatures; and 3) the determination of driving forces of given cell types. We applied this pipeline to the RNA-seq analysis of single cells isolated from embryonic mouse lung at E16.5. Through the pipeline analysis, we distinguished major cell types of fetal mouse lung, including epithelial, endothelial, smooth muscle, pericyte, and fibroblast-like cell types, and identified cell type specific gene signatures, bioprocesses, and key regulators. SINCERA is implemented in R, licensed under the GNU General Public License v3, and freely available from CCHMC PBGE website, https://research.cchmc.org/pbge/sincera.html.}, language = {en}, number = {11}, urldate = {2018-12-05}, journal = {PLOS Computational Biology}, author = {Guo, Minzhe and Wang, Hui and Potter, S. Steven and Whitsett, Jeffrey A. and Xu, Yan}, month = nov, year = {2015}, keywords = {Gene regulation, Gene expression, Centrality, Epithelial cells, Lung development, Gene prediction, Transcriptional control, Regulator genes}, pages = {e1004575}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RX9EBPZX/Guo et al. - 2015 - SINCERA A Pipeline for Single-Cell RNA-Seq Profil.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ZPINKUFR/article.html:text/html} } @article{satija_spatial_2015, title = {Spatial reconstruction of single-cell gene expression data}, volume = {33}, copyright = {2015 Nature Publishing Group}, issn = {1546-1696}, url = {https://www.nature.com/articles/nbt.3192}, doi = {10.1038/nbt.3192}, abstract = {Spatial localization is a key determinant of cellular fate and behavior, but methods for spatially resolved, transcriptome-wide gene expression profiling across complex tissues are lacking. RNA staining methods assay only a small number of transcripts, whereas single-cell RNA-seq, which measures global gene expression, separates cells from their native spatial context. Here we present Seurat, a computational strategy to infer cellular localization by integrating single-cell RNA-seq data with in situ RNA patterns. We applied Seurat to spatially map 851 single cells from dissociated zebrafish (Danio rerio) embryos and generated a transcriptome-wide map of spatial patterning. We confirmed Seurat's accuracy using several experimental approaches, then used the strategy to identify a set of archetypal expression patterns and spatial markers. Seurat correctly localizes rare subpopulations, accurately mapping both spatially restricted and scattered groups. Seurat will be applicable to mapping cellular localization within complex patterned tissues in diverse systems.}, language = {en}, number = {5}, urldate = {2018-12-05}, journal = {Nature Biotechnology}, author = {Satija, Rahul and Farrell, Jeffrey A. and Gennert, David and Schier, Alexander F. and Regev, Aviv}, month = may, year = {2015}, pages = {495--502}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IPFURHTU/Satija et al. - 2015 - Spatial reconstruction of single-cell gene express.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Q4FSVTQ5/nbt.html:text/html} } @article{finak_mast:_2015, title = {{MAST}: a flexible statistical framework for assessing transcriptional changes and characterizing heterogeneity in single-cell {RNA} sequencing data}, volume = {16}, issn = {1474-760X}, shorttitle = {{MAST}}, url = {https://doi.org/10.1186/s13059-015-0844-5}, doi = {10.1186/s13059-015-0844-5}, abstract = {Single-cell transcriptomics reveals gene expression heterogeneity but suffers from stochastic dropout and characteristic bimodal expression distributions in which expression is either strongly non-zero or non-detectable. We propose a two-part, generalized linear model for such bimodal data that parameterizes both of these features. We argue that the cellular detection rate, the fraction of genes expressed in a cell, should be adjusted for as a source of nuisance variation. Our model provides gene set enrichment analysis tailored to single-cell data. It provides insights into how networks of co-expressed genes evolve across an experimental treatment. MAST is available at https://github.com/RGLab/MAST.}, number = {1}, journal = {Genome Biology}, author = {Finak, Greg and McDavid, Andrew and Yajima, Masanao and Deng, Jingyuan and Gersuk, Vivian and Shalek, Alex K. and Slichter, Chloe K. and Miller, Hannah W. and McElrath, M. Juliana and Prlic, Martin and Linsley, Peter S. and Gottardo, Raphael}, month = dec, year = {2015}, pages = {278}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/F86SBQ72/Finak et al. - 2015 - MAST a flexible statistical framework for assessi.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KMMCHHVM/s13059-015-0844-5.html:text/html} } @article{fan_characterizing_2016, title = {Characterizing transcriptional heterogeneity through pathway and gene set overdispersion analysis}, volume = {13}, copyright = {2016 Nature Publishing Group}, issn = {1548-7105}, url = {https://www.nature.com/articles/nmeth.3734}, doi = {10.1038/nmeth.3734}, abstract = {The transcriptional state of a cell reflects a variety of biological factors, from cell-type-specific features to transient processes such as the cell cycle, all of which may be of interest. However, identifying such aspects from noisy single-cell RNA-seq data remains challenging. We developed pathway and gene set overdispersion analysis (PAGODA) to resolve multiple, potentially overlapping aspects of transcriptional heterogeneity by testing gene sets for coordinated variability among measured cells.}, language = {en}, number = {3}, urldate = {2018-12-05}, journal = {Nature Methods}, author = {Fan, Jean and Salathia, Neeraj and Liu, Rui and Kaeser, Gwendolyn E. and Yung, Yun C. and Herman, Joseph L. and Kaper, Fiona and Fan, Jian-Bing and Zhang, Kun and Chun, Jerold and Kharchenko, Peter V.}, month = mar, year = {2016}, pages = {241--244}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IHVKTAAS/Fan et al. - 2016 - Characterizing transcriptional heterogeneity throu.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7ZP93GHG/nmeth.html:text/html} } @article{kiselev_sc3:_2017, title = {{SC}3: consensus clustering of single-cell {RNA}-seq data}, volume = {14}, copyright = {2017 Nature Publishing Group}, issn = {1548-7105}, shorttitle = {{SC}3}, url = {https://www.nature.com/articles/nmeth.4236}, doi = {10.1038/nmeth.4236}, abstract = {Single-cell RNA-seq enables the quantitative characterization of cell types based on global transcriptome profiles. We present single-cell consensus clustering (SC3), a user-friendly tool for unsupervised clustering, which achieves high accuracy and robustness by combining multiple clustering solutions through a consensus approach (http://bioconductor.org/packages/SC3). We demonstrate that SC3 is capable of identifying subclones from the transcriptomes of neoplastic cells collected from patients.}, language = {en}, number = {5}, urldate = {2018-12-05}, journal = {Nature Methods}, author = {Kiselev, Vladimir Yu and Kirschner, Kristina and Schaub, Michael T. and Andrews, Tallulah and Yiu, Andrew and Chandra, Tamir and Natarajan, Kedar N. and Reik, Wolf and Barahona, Mauricio and Green, Anthony R. and Hemberg, Martin}, month = may, year = {2017}, pages = {483--486}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RJHM5C3T/Kiselev et al. - 2017 - SC3 consensus clustering of single-cell RNA-seq d.pdf:application/pdf;Kiselev et al. - 2017 - Supplementals.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/62HT7SUC/Kiselev et al. - 2017 - Supplementals.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/FKGFKJWC/nmeth.html:text/html} } @article{gonzalez-blas_cis-topic_2018, title = {Cis-topic modelling of single cell epigenomes}, copyright = {© 2018, Posted by Cold Spring Harbor Laboratory. The copyright holder for this pre-print is the author. All rights reserved. The material may not be redistributed, re-used or adapted without the author's permission.}, url = {https://www.biorxiv.org/content/early/2018/07/16/370346}, doi = {10.1101/370346}, abstract = {Single-cell epigenomics provides new opportunities to decipher genomic regulatory programs from heterogeneous samples and dynamic processes. We present a probabilistic framework called cisTopic, to simultaneously discover "cis-regulatory topics" and stable cell states from sparse single-cell epigenomics data. After benchmarking cisTopic on single-cell ATAC-seq data, single-cell DNA methylation data, and semi-simulated single-cell ChIP-seq data, we use cisTopic to predict regulatory programs in the human brain and validate these by aligning them with co-expression networks derived from single-cell RNA-seq data. Next, we performed a time-series single-cell ATAC-seq experiment after SOX10 perturbations in melanoma cultures, where cisTopic revealed dynamic regulatory topics driven by SOX10 and AP-1. Finally, machine learning and enhancer modelling approaches allowed to predict cell type specific SOX10 and SOX9 binding sites based on topic specific co-regulatory motifs. cisTopic is available as an R/Bioconductor package at http://github.com/aertslab/cistopic.}, language = {en}, urldate = {2018-12-10}, journal = {bioRxiv}, author = {González-Blas, Carmen Bravo and Minnoye, Liesbeth and Papasokrati, Dafni and Aibar, Sara and Hulselmans, Gert and Christiaens, Valerie and Davie, Kristofer and Wouters, Jasper and Aerts, Stein}, month = jul, year = {2018}, pages = {370346}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6YTUEV8W/González-Blas et al. - 2018 - Cis-topic modelling of single cell epigenomes.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/H72ELCGI/370346.html:text/html} } @article{aibar_scenic:_2017, title = {{SCENIC}: single-cell regulatory network inference and clustering}, volume = {14}, copyright = {2017 Nature Publishing Group}, issn = {1548-7105}, shorttitle = {{SCENIC}}, url = {https://www.nature.com/articles/nmeth.4463}, doi = {10.1038/nmeth.4463}, abstract = {We present SCENIC, a computational method for simultaneous gene regulatory network reconstruction and cell-state identification from single-cell RNA-seq data (http://scenic.aertslab.org). On a compendium of single-cell data from tumors and brain, we demonstrate that cis-regulatory analysis can be exploited to guide the identification of transcription factors and cell states. SCENIC provides critical biological insights into the mechanisms driving cellular heterogeneity.}, language = {en}, number = {11}, urldate = {2018-12-10}, journal = {Nature Methods}, author = {Aibar, Sara and González-Blas, Carmen Bravo and Moerman, Thomas and Huynh-Thu, Vân Anh and Imrichova, Hana and Hulselmans, Gert and Rambow, Florian and Marine, Jean-Christophe and Geurts, Pierre and Aerts, Jan and van den Oord, Joost and Atak, Zeynep Kalender and Wouters, Jasper and Aerts, Stein}, month = nov, year = {2017}, pages = {1083--1086}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SXK75H8W/Aibar et al. - 2017 - SCENIC single-cell regulatory network inference a.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/LZTL22E5/nmeth.html:text/html} } @article{zerbino_ensembl_2015, title = {The {Ensembl} {Regulatory} {Build}}, volume = {16}, issn = {1465-6906}, url = {https://doi.org/10.1186/s13059-015-0621-5}, doi = {10.1186/s13059-015-0621-5}, abstract = {Most genomic variants associated with phenotypic traits or disease do not fall within gene coding regions, but in regulatory regions, rendering their interpretation difficult. We collected public data on epigenetic marks and transcription factor binding in human cell types and used it to construct an intuitive summary of regulatory regions in the human genome. We verified it against independent assays for sensitivity. The Ensembl Regulatory Build will be progressively enriched when more data is made available. It is freely available on the Ensembl browser, from the Ensembl Regulation MySQL database server and in a dedicated track hub.}, number = {1}, urldate = {2018-12-10}, journal = {Genome Biology}, author = {Zerbino, Daniel R. and Wilder, Steven P. and Johnson, Nathan and Juettemann, Thomas and Flicek, Paul R.}, month = mar, year = {2015}, pages = {56}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/PUUXY9Q3/Zerbino et al. - 2015 - The Ensembl Regulatory Build.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3HYDPEH5/s13059-015-0621-5.html:text/html} } @article{cao_joint_2018, title = {Joint profiling of chromatin accessibility and gene expression in thousands of single cells}, volume = {361}, copyright = {Copyright © 2018 The Authors, some rights reserved; exclusive licensee American Association for the Advancement of Science. No claim to original U.S. Government Works. http://www.sciencemag.org/about/science-licenses-journal-article-reuseThis is an article distributed under the terms of the Science Journals Default License.}, issn = {0036-8075, 1095-9203}, url = {http://science.sciencemag.org/content/361/6409/1380}, doi = {10.1126/science.aau0730}, abstract = {Single-cell chromatin and RNA analysis Single-cell analyses have begun to provide insight into the differences among and within the individual cells that make up a tissue or organism. However, technological barriers owing to the small amount of material present in each single cell have prevented parallel analyses. Cao et al. present sci-CAR, a pooled barcode method that jointly analyzes both the RNA transcripts and chromatin profiles of single cells. By applying sci-CAR to lung adenocarcinoma cells and mouse kidney tissue, the authors demonstrate precision in assessing expression and genome accessibility at a genome-wide scale. The approach provides an improvement over bulk analysis, which can be confounded by differing cellular subgroups. Science, this issue p. 1380 Although we can increasingly measure transcription, chromatin, methylation, and other aspects of molecular biology at single-cell resolution, most assays survey only one aspect of cellular biology. Here we describe sci-CAR, a combinatorial indexing–based coassay that jointly profiles chromatin accessibility and mRNA (CAR) in each of thousands of single cells. As a proof of concept, we apply sci-CAR to 4825 cells, including a time series of dexamethasone treatment, as well as to 11,296 cells from the adult mouse kidney. With the resulting data, we compare the pseudotemporal dynamics of chromatin accessibility and gene expression, reconstruct the chromatin accessibility profiles of cell types defined by RNA profiles, and link cis-regulatory sites to their target genes on the basis of the covariance of chromatin accessibility and transcription across large numbers of single cells. A technique termed sci-CAR can assess both chromatin accessibility and RNA transcription at the single-cell level. A technique termed sci-CAR can assess both chromatin accessibility and RNA transcription at the single-cell level.}, language = {en}, number = {6409}, urldate = {2018-12-17}, journal = {Science}, author = {Cao, Junyue and Cusanovich, Darren A. and Ramani, Vijay and Aghamirzaie, Delasa and Pliner, Hannah A. and Hill, Andrew J. and Daza, Riza M. and McFaline-Figueroa, Jose L. and Packer, Jonathan S. and Christiansen, Lena and Steemers, Frank J. and Adey, Andrew C. and Trapnell, Cole and Shendure, Jay}, month = sep, year = {2018}, pmid = {30166440}, pages = {1380--1385}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SHE8MKCM/Cao et al. - 2018 - Joint profiling of chromatin accessibility and gen.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9JBD4YCU/1380.html:text/html} } @article{kolodziejczyk_technology_2015, title = {The {Technology} and {Biology} of {Single}-{Cell} {RNA} {Sequencing}}, volume = {58}, issn = {1097-2765}, url = {http://www.sciencedirect.com/science/article/pii/S1097276515002610}, doi = {10.1016/j.molcel.2015.04.005}, abstract = {The differences between individual cells can have profound functional consequences, in both unicellular and multicellular organisms. Recently developed single-cell mRNA-sequencing methods enable unbiased, high-throughput, and high-resolution transcriptomic analysis of individual cells. This provides an additional dimension to transcriptomic information relative to traditional methods that profile bulk populations of cells. Already, single-cell RNA-sequencing methods have revealed new biology in terms of the composition of tissues, the dynamics of transcription, and the regulatory relationships between genes. Rapid technological developments at the level of cell capture, phenotyping, molecular biology, and bioinformatics promise an exciting future with numerous biological and medical applications.}, number = {4}, urldate = {2019-01-14}, journal = {Molecular Cell}, author = {Kolodziejczyk, Aleksandra A. and Kim, Jong Kyoung and Svensson, Valentine and Marioni, John C. and Teichmann, Sarah A.}, month = may, year = {2015}, pages = {610--620}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KH6KQ2M6/Kolodziejczyk et al. - 2015 - The Technology and Biology of Single-Cell RNA Sequ.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3FC8TUDQ/S1097276515002610.html:text/html} } @article{islam_quantitative_2014, title = {Quantitative single-cell {RNA}-seq with unique molecular identifiers}, volume = {11}, copyright = {2013 Nature Publishing Group}, issn = {1548-7105}, url = {https://www.nature.com/articles/nmeth.2772}, doi = {10.1038/nmeth.2772}, abstract = {Single-cell RNA sequencing (RNA-seq) is a powerful tool to reveal cellular heterogeneity, discover new cell types and characterize tumor microevolution. However, losses in cDNA synthesis and bias in cDNA amplification lead to severe quantitative errors. We show that molecular labels—random sequences that label individual molecules—can nearly eliminate amplification noise, and that microfluidic sample preparation and optimized reagents produce a fivefold improvement in mRNA capture efficiency.}, language = {en}, number = {2}, urldate = {2019-01-14}, journal = {Nature Methods}, author = {Islam, Saiful and Zeisel, Amit and Joost, Simon and La Manno, Gioele and Zajac, Pawel and Kasper, Maria and Lönnerberg, Peter and Linnarsson, Sten}, month = feb, year = {2014}, pages = {163--166}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UCF3T8UV/Islam et al. - 2014 - Quantitative single-cell RNA-seq with unique molec.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/2YGPMH8E/nmeth.html:text/html} } @article{vallejos_normalizing_2017, title = {Normalizing single-cell {RNA} sequencing data: challenges and opportunities}, volume = {14}, copyright = {2017 Nature Publishing Group}, issn = {1548-7105}, shorttitle = {Normalizing single-cell {RNA} sequencing data}, url = {https://www.nature.com/articles/nmeth.4292}, doi = {10.1038/nmeth.4292}, abstract = {Single-cell transcriptomics is becoming an important component of the molecular biologist's toolkit. A critical step when analyzing data generated using this technology is normalization. However, normalization is typically performed using methods developed for bulk RNA sequencing or even microarray data, and the suitability of these methods for single-cell transcriptomics has not been assessed. We here discuss commonly used normalization approaches and illustrate how these can produce misleading results. Finally, we present alternative approaches and provide recommendations for single-cell RNA sequencing users.}, language = {en}, number = {6}, urldate = {2019-01-17}, journal = {Nature Methods}, author = {Vallejos, Catalina A. and Risso, Davide and Scialdone, Antonio and Dudoit, Sandrine and Marioni, John C.}, month = jun, year = {2017}, pages = {565--571}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IDFSTNYM/Vallejos et al. - 2017 - Normalizing single-cell RNA sequencing data chall.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9PV8R8CQ/nmeth.html:text/html} } @article{wang_gene_2018, title = {Gene expression distribution deconvolution in single-cell {RNA} sequencing}, volume = {115}, copyright = {Copyright © 2018 the Author(s). Published by PNAS.. This open access article is distributed under Creative Commons Attribution-NonCommercial-NoDerivatives License 4.0 (CC BY-NC-ND).}, issn = {0027-8424, 1091-6490}, url = {https://www.pnas.org/content/115/28/E6437}, doi = {10.1073/pnas.1721085115}, abstract = {Single-cell RNA sequencing (scRNA-seq) enables the quantification of each gene’s expression distribution across cells, thus allowing the assessment of the dispersion, nonzero fraction, and other aspects of its distribution beyond the mean. These statistical characterizations of the gene expression distribution are critical for understanding expression variation and for selecting marker genes for population heterogeneity. However, scRNA-seq data are noisy, with each cell typically sequenced at low coverage, thus making it difficult to infer properties of the gene expression distribution from raw counts. Based on a reexamination of nine public datasets, we propose a simple technical noise model for scRNA-seq data with unique molecular identifiers (UMI). We develop deconvolution of single-cell expression distribution (DESCEND), a method that deconvolves the true cross-cell gene expression distribution from observed scRNA-seq counts, leading to improved estimates of properties of the distribution such as dispersion and nonzero fraction. DESCEND can adjust for cell-level covariates such as cell size, cell cycle, and batch effects. DESCEND’s noise model and estimation accuracy are further evaluated through comparisons to RNA FISH data, through data splitting and simulations and through its effectiveness in removing known batch effects. We demonstrate how DESCEND can clarify and improve downstream analyses such as finding differentially expressed genes, identifying cell types, and selecting differentiation markers.}, language = {en}, number = {28}, urldate = {2019-01-24}, journal = {Proceedings of the National Academy of Sciences}, author = {Wang, Jingshu and Huang, Mo and Torre, Eduardo and Dueck, Hannah and Shaffer, Sydney and Murray, John and Raj, Arjun and Li, Mingyao and Zhang, Nancy R.}, month = jul, year = {2018}, pmid = {29946020}, keywords = {differential expression, Gini coefficient, highly variable genes, RNA sequencing, single-cell transcriptomics}, pages = {E6437--E6446}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/57DNLDK4/Wang et al. - 2018 - Gene expression distribution deconvolution in sing.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/YUYL4KY2/E6437.html:text/html} } @article{kiselev_challenges_2019, title = {Challenges in unsupervised clustering of single-cell {RNA}-seq data}, copyright = {2018 Springer Nature Limited}, issn = {1471-0064}, url = {https://www.nature.com/articles/s41576-018-0088-9}, doi = {10.1038/s41576-018-0088-9}, abstract = {Single-cell RNA sequencing (scRNA-seq) enables transcriptome-based characterization of the constituent cell types within a heterogeneous sample. However, reliable analysis and biological interpretation typically require optimal use of clustering algorithms. This Review discusses the multiple algorithmic options for clustering scRNA-seq data, including various technical, biological and computational considerations.}, language = {En}, urldate = {2019-01-31}, journal = {Nature Reviews Genetics}, author = {Kiselev, Vladimir Yu and Andrews, Tallulah S. and Hemberg, Martin}, month = jan, year = {2019}, pages = {1}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/YIHZU3GQ/Kiselev et al. - 2019 - Challenges in unsupervised clustering of single-ce.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BQ9FYZH6/s41576-018-0088-9.html:text/html} } @article{ji_tscan:_2016, title = {{TSCAN}: {Pseudo}-time reconstruction and evaluation in single-cell {RNA}-seq analysis}, volume = {44}, issn = {0305-1048}, shorttitle = {{TSCAN}}, url = {https://academic.oup.com/nar/article/44/13/e117/2457590}, doi = {10.1093/nar/gkw430}, abstract = {Abstract. When analyzing single-cell RNA-seq data, constructing a pseudo-temporal path to order cells based on the gradual transition of their transcriptomes i}, language = {en}, number = {13}, urldate = {2019-02-04}, journal = {Nucleic Acids Research}, author = {Ji, Zhicheng and Ji, Hongkai}, month = jul, year = {2016}, pages = {e117--e117}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/F4AMNTIU/Ji and Ji - 2016 - TSCAN Pseudo-time reconstruction and evaluation i.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QURDXBZ3/2457590.html:text/html} } @article{kumasaka_high-resolution_2019, title = {High-resolution genetic mapping of putative causal interactions between regions of open chromatin}, volume = {51}, copyright = {2018 The Author(s), under exclusive licence to Springer Nature America, Inc.}, issn = {1546-1718}, url = {https://www.nature.com/articles/s41588-018-0278-6}, doi = {10.1038/s41588-018-0278-6}, abstract = {A Bayesian hierarchical approach identifies over 15,000 causal regulatory interactions in the human genome using ATAC-seq data from 100 individuals. The majority of detected interactions were over distances of \<20 kb, a range where 3C methods perform poorly.}, language = {En}, number = {1}, urldate = {2019-03-14}, journal = {Nature Genetics}, author = {Kumasaka, Natsuhiko and Knights, Andrew J. and Gaffney, Daniel J.}, month = jan, year = {2019}, pages = {128}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/CS8XIVYF/Kumasaka et al. - 2019 - High-resolution genetic mapping of putative causal.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TIPW979S/s41588-018-0278-6.html:text/html} } @article{grossman_positional_2018, title = {Positional specificity of different transcription factor classes within enhancers}, volume = {115}, copyright = {Copyright © 2018 the Author(s). Published by PNAS.. This open access article is distributed under Creative Commons Attribution-NonCommercial-NoDerivatives License 4.0 (CC BY-NC-ND).}, issn = {0027-8424, 1091-6490}, url = {https://www.pnas.org/content/115/30/E7222}, doi = {10.1073/pnas.1804663115}, abstract = {Gene expression is controlled by sequence-specific transcription factors (TFs), which bind to regulatory sequences in DNA. TF binding occurs in nucleosome-depleted regions of DNA (NDRs), which generally encompass regions with lengths similar to those protected by nucleosomes. However, less is known about where within these regions specific TFs tend to be found. Here, we characterize the positional bias of inferred binding sites for 103 TFs within ∼500,000 NDRs across 47 cell types. We find that distinct classes of TFs display different binding preferences: Some tend to have binding sites toward the edges, some toward the center, and some at other positions within the NDR. These patterns are highly consistent across cell types, suggesting that they may reflect TF-specific intrinsic structural or functional characteristics. In particular, TF classes with binding sites at NDR edges are enriched for those known to interact with histones and chromatin remodelers, whereas TFs with central enrichment interact with other TFs and cofactors such as p300. Our results suggest distinct regiospecific binding patterns and functions of TF classes within enhancers.}, language = {en}, number = {30}, urldate = {2019-04-01}, journal = {Proceedings of the National Academy of Sciences}, author = {Grossman, Sharon R. and Engreitz, Jesse and Ray, John P. and Nguyen, Tung H. and Hacohen, Nir and Lander, Eric S.}, month = jul, year = {2018}, pmid = {29987030}, keywords = {genomics, chromatin structure, gene regulation, transcription factor binding}, pages = {E7222--E7230}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KFUGBD65/Grossman et al. - 2018 - Positional specificity of different transcription .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SJD8DKDS/E7222.html:text/html} } @article{berest_quantification_2018, title = {Quantification of differential transcription factor activity and multiomics-based classification into activators and repressors: {diffTF}}, shorttitle = {Quantification of differential transcription factor activity and multiomics-based classification into activators and repressors}, url = {http://biorxiv.org/lookup/doi/10.1101/368498}, doi = {10.1101/368498}, abstract = {Transcription factor (TF) activity is an important read-out of cellular signalling pathways and thus to assess regulatory differences across conditions. However, current technologies lack the ability to simultaneously assess activity changes for multiple TFs and in particular to determine whether a specific TF acts globally as transcriptional repressor or activator. To this end, we introduce a widely applicable genome-wide method diffTF to assess differential TF activity and to classify TFs as activator or repressor (available at https://git.embl.de/grp-zaugg/diffTF). This is done by integrating any type of genome-wide chromatin accessibility data with RNA-Seq data and in-silico predicted TF binding sites. We corroborated the classification of TFs into repressors and activators by three independent analyses based on enrichments of active/repressive chromatin states, correlation of TF activity with gene expression, and activator- and repressor-specific chromatin footprints. To show the power of diffTF, we present two case studies: First, we applied diffTF in to a large ATAC-Seq/RNA-Seq dataset comparing mutated and unmutated chronic lymphocytic leukemia samples, where we identified dozens of known (40\%) and potentially novel (60\%) TFs that are differentially active. We were also able to classify almost half of them as either repressor and activator. Second, we applied diffTF to a small ATAC-Seq/RNA-Seq data set comparing two cell types along the hematopoietic differentiation trajectory (multipotent progenitors - MPP - versus granulocyte-macrophage progenitors - GMP). Here we identified the known drivers of differentiation and found that the majority of the differentially active TFs are transcriptional activators. Overall, diffTF was able to recover the known TFs in both case studies, additionally identified TFs that have been less well characterized in the given condition, and provides a classification of the TFs into transcriptional activators and repressors.}, language = {en}, urldate = {2019-04-01}, journal = {bioRxiv}, author = {Berest, Ivan and Arnold, Christian and Reyes-Palomares, Armando and Palla, Giovanni and Rasmussen, Kasper Dindler and Helin, Kristian and Zaugg, Judith}, month = dec, year = {2018}, file = {Berest et al. - 2018 - Quantification of differential transcription facto.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/T89PUHUV/Berest et al. - 2018 - Quantification of differential transcription facto.pdf:application/pdf} } @article{gonzalez-blas_cistopic:_2019, title = {{cisTopic}: cis-regulatory topic modeling on single-cell {ATAC}-seq data}, volume = {16}, copyright = {2019 The Author(s), under exclusive licence to Springer Nature America, Inc.}, issn = {1548-7105}, shorttitle = {{cisTopic}}, url = {https://www.nature.com/articles/s41592-019-0367-1}, doi = {10.1038/s41592-019-0367-1}, abstract = {As an unsupervised Bayesian framework, cisTopic classifies regions in scATAC-seq data into regulatory topics, which are used for clustering.}, language = {En}, number = {5}, urldate = {2019-05-07}, journal = {Nature Methods}, author = {González-Blas, Carmen Bravo and Minnoye, Liesbeth and Papasokrati, Dafni and Aibar, Sara and Hulselmans, Gert and Christiaens, Valerie and Davie, Kristofer and Wouters, Jasper and Aerts, Stein}, month = may, year = {2019}, pages = {397}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/CILUNPYF/González-Blas et al. - 2019 - cisTopic cis-regulatory topic modeling on single-.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RXHF5XRB/s41592-019-0367-1.html:text/html} } @article{fang_fast_2019, title = {Fast and {Accurate} {Clustering} of {Single} {Cell} {Epigenomes} {Reveals} {Cis}-{Regulatory} {Elements} in {Rare} {Cell} {Types}}, copyright = {© 2019, Posted by Cold Spring Harbor Laboratory. The copyright holder for this pre-print is the author. All rights reserved. The material may not be redistributed, re-used or adapted without the author's permission.}, url = {https://www.biorxiv.org/content/10.1101/615179v1}, doi = {10.1101/615179}, abstract = {{\textless}p{\textgreater}Mammalian tissues are composed of highly specialized cell types defined by distinct gene expression patterns. Identification of cis-regulatory elements responsible for cell-type specific gene expression is essential for understanding the origin of the cellular diversity. Conventional assays to map cis-elements via open chromatin analysis of primary tissues fail to resolve their cell type specificity and lack the sensitivity to identify cis-elements in rare cell types. Single nucleus analysis of transposase-accessible chromatin (ATAC-seq) can overcome this limitation, but current analysis methods begin with pre-defined genomic regions of accessibility and are therefore biased toward the dominant population of a tissue. Here we report a method, Single Nucleus Analysis Pipeline for ATAC-seq (SnapATAC), that can efficiently dissect cellular heterogeneity in an unbiased manner using single nucleus ATAC-seq datasets and identify candidate regulatory sequences in constituent cell types. We demonstrate that SnapATAC outperforms existing methods in both accuracy and scalability. We further analyze 64,795 single cell chromatin profiles from the secondary motor cortex of mouse brain, creating a chromatin landscape atlas with unprecedent resolution, including over 300,000 candidate cis-regulatory elements in nearly 50 distinct cell populations. These results demonstrate a systematic approach for comprehensive analysis of cis-regulatory sequences in the mammalian genomes.{\textless}/p{\textgreater}}, language = {en}, urldate = {2019-05-07}, journal = {bioRxiv}, author = {Fang, Rongxin and Preissl, Sebastian and Hou, Xiaomeng and Lucero, Jacinta and Wang, Xinxin and Motamedi, Amir and Shiau, Andrew K. and Mukamel, Eran A. and Zhang, Yanxiao and Behrens, M. Margarita and Ecker, Joseph and Ren, Bing}, month = apr, year = {2019}, pages = {615179}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/T6VU8VIB/Fang et al. - 2019 - Fast and Accurate Clustering of Single Cell Epigen.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/JKLJNT7V/615179v1.html:text/html} } @article{li_identification_2019, title = {Identification of transcription factor binding sites using {ATAC}-seq}, volume = {20}, issn = {1474-760X}, url = {https://doi.org/10.1186/s13059-019-1642-2}, doi = {10.1186/s13059-019-1642-2}, abstract = {Transposase-Accessible Chromatin followed by sequencing (ATAC-seq) is a simple protocol for detection of open chromatin. Computational footprinting, the search for regions with depletion of cleavage events due to transcription factor binding, is poorly understood for ATAC-seq. We propose the first footprinting method considering ATAC-seq protocol artifacts. HINT-ATAC uses a position dependency model to learn the cleavage preferences of the transposase. We observe strand-specific cleavage patterns around transcription factor binding sites, which are determined by local nucleosome architecture. By incorporating all these biases, HINT-ATAC is able to significantly outperform competing methods in the prediction of transcription factor binding sites with footprints.}, number = {1}, urldate = {2019-05-21}, journal = {Genome Biology}, author = {Li, Zhijian and Schulz, Marcel H. and Look, Thomas and Begemann, Matthias and Zenke, Martin and Costa, Ivan G.}, month = feb, year = {2019}, pages = {45}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KFNKU6ZM/Li et al. - 2019 - Identification of transcription factor binding sit.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/VISSH4XB/s13059-019-1642-2.html:text/html} } @article{vierstra_genomic_2016, title = {Genomic footprinting}, volume = {13}, copyright = {2016 Nature Publishing Group}, issn = {1548-7105}, url = {https://www.nature.com/articles/nmeth.3768}, doi = {10.1038/nmeth.3768}, abstract = {The advent of DNA footprinting with DNase I more than 35 years ago enabled the systematic analysis of protein-DNA interactions, and the technique has been instrumental in the decoding of cis-regulatory elements and the identification and characterization of transcription factors and other DNA-binding proteins. The ability to analyze millions of individual genomic cleavage events via massively parallel sequencing has enabled in vivo DNase I footprinting on a genomic scale, offering the potential for global analysis of transcription factor occupancy in a single experiment. Genomic footprinting has opened unique vistas on the organization, function and evolution of regulatory DNA; however, the technology is still nascent. Here we discuss both prospects and challenges of genomic footprinting, as well as considerations for its application to complex genomes.}, language = {en}, number = {3}, urldate = {2019-09-30}, journal = {Nature Methods}, author = {Vierstra, Jeff and Stamatoyannopoulos, John A.}, month = mar, year = {2016}, pages = {213--221}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GTRVMWIS/Vierstra and Stamatoyannopoulos - 2016 - Genomic footprinting.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/F4K3L8EJ/nmeth.html:text/html} } @article{barozzi_co-regulation_2014, title = {Co-regulation of transcription factor binding and nucleosome occupancy through {DNA} features of mammalian enhancers}, volume = {54}, issn = {1097-2765}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4048654/}, doi = {10.1016/j.molcel.2014.04.006}, abstract = {Transcription factors (TFs) preferentially bind sites contained in regions of computationally predicted high nucleosomal occupancy, suggesting that nucleosomes are gatekeepers of TF binding sites. However, because of their complexity mammalian genomes contain millions of randomly occurring, unbound TF consensus binding sites. We hypothesized that the information controlling nucleosome assembly may coincide with the information that enables TFs to bind cis-regulatory elements while ignoring randomly occurring sites. Hence, nucleosome would selectively mask genomic sites contacted by TFs and thus potentially functional. The hematopoietic TF Pu.1 maintained nucleosome depletion at macrophage-specific enhancers that displayed a broad range of nucleosome occupancy in other cell types and in reconstituted chromatin. We identified a minimal set of DNA sequence and shape features that accurately predicted both Pu.1 binding and nucleosome occupancy genome-wide. These data reveal a basic organizational principle of mammalian cis-regulatory elements whereby TF recruitment and nucleosome deposition are controlled by overlapping DNA sequence features.}, number = {5}, urldate = {2019-09-30}, journal = {Molecular cell}, author = {Barozzi, Iros and Simonatto, Marta and Bonifacio, Silvia and Yang, Lin and Rohs, Remo and Ghisletti, Serena and Natoli, Gioacchino}, month = jun, year = {2014}, pmid = {24813947}, pmcid = {PMC4048654}, pages = {844--857}, file = {PubMed Central Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6RTT9G5R/Barozzi et al. - 2014 - Co-regulation of transcription factor binding and .pdf:application/pdf} } @article{adey_rapid_2010, title = {Rapid, low-input, low-bias construction of shotgun fragment libraries by high-density in vitro transposition}, volume = {11}, issn = {1474-760X}, url = {https://doi.org/10.1186/gb-2010-11-12-r119}, doi = {10.1186/gb-2010-11-12-r119}, abstract = {We characterize and extend a highly efficient method for constructing shotgun fragment libraries in which transposase catalyzes in vitro DNA fragmentation and adaptor incorporation simultaneously. We apply this method to sequencing a human genome and find that coverage biases are comparable to those of conventional protocols. We also extend its capabilities by developing protocols for sub-nanogram library construction, exome capture from 50 ng of input DNA, PCR-free and colony PCR library construction, and 96-plex sample indexing.}, number = {12}, urldate = {2019-09-30}, journal = {Genome Biology}, author = {Adey, Andrew and Morrison, Hilary G. and {Asan} and Xun, Xu and Kitzman, Jacob O. and Turner, Emily H. and Stackhouse, Bethany and MacKenzie, Alexandra P. and Caruccio, Nicholas C. and Zhang, Xiuqing and Shendure, Jay}, month = dec, year = {2010}, pages = {R119}, file = {Full Text:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XYZCW65D/Adey et al. - 2010 - Rapid, low-input, low-bias construction of shotgun.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ZGDVGS7E/gb-2010-11-12-r119.html:text/html} } @article{klemm_chromatin_2019, title = {Chromatin accessibility and the regulatory epigenome}, volume = {20}, copyright = {2018 Springer Nature Limited}, issn = {1471-0064}, url = {https://www.nature.com/articles/s41576-018-0089-8}, doi = {10.1038/s41576-018-0089-8}, abstract = {Chromatin accessibility comprises the positions, compaction and dynamics of nucleosomes, as well as the occupancy of DNA by other proteins such as transcription factors. In this Review, the authors discuss diverse methods for characterizing chromatin accessibility, how accessibility is determined and remodelled in cells and the regulatory roles of accessibility in gene expression and development.}, language = {en}, number = {4}, urldate = {2019-10-01}, journal = {Nature Reviews Genetics}, author = {Klemm, Sandy L. and Shipony, Zohar and Greenleaf, William J.}, month = apr, year = {2019}, pages = {207--220}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7M52REY6/Klemm et al. - 2019 - Chromatin accessibility and the regulatory epigeno.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4Q5BBBWP/s41576-018-0089-8.html:text/html} } @article{groux_spar-k:_2019, title = {{SPar}-{K}: a method to partition {NGS} signal data}, shorttitle = {{SPar}-{K}}, url = {https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btz416/5497248}, doi = {10.1093/bioinformatics/btz416}, abstract = {AbstractSummary. We present SPar-K (Signal Partitioning with K-means), a method to search for archetypical chromatin architectures by partitioning a set of gen}, language = {en}, urldate = {2019-10-01}, journal = {Bioinformatics}, author = {Groux, Romain and Bucher, Philipp}, month = may, year = {2019}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MB4BIGBN/Groux and Bucher - SPar-K a method to partition NGS signal data.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5FDIJ676/5497248.html:text/html;supplemental.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MB4BIGBN/supplemental.pdf:application/pdf} } @article{ou_motifstack_2018, title = {{motifStack} for the analysis of transcription factor binding site evolution}, volume = {15}, copyright = {2018 Nature Publishing Group}, issn = {1548-7105}, url = {https://www.nature.com/articles/nmeth.4555}, doi = {10.1038/nmeth.4555}, abstract = {motifStack for the analysis of transcription factor binding site evolution}, language = {en}, number = {1}, urldate = {2019-10-09}, journal = {Nature Methods}, author = {Ou, Jianhong and Wolfe, Scot A. and Brodsky, Michael H. and Zhu, Lihua Julie}, month = jan, year = {2018}, pages = {8--9}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8H25FY5M/Ou et al. - 2018 - motifStack for the analysis of transcription facto.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TNN9838Q/nmeth.html:text/html} } @article{langmead_ultrafast_2009, title = {Ultrafast and memory-efficient alignment of short {DNA} sequences to the human genome}, volume = {10}, issn = {1474-760X}, url = {https://doi.org/10.1186/gb-2009-10-3-r25}, doi = {10.1186/gb-2009-10-3-r25}, abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning short DNA sequence reads to large genomes. For the human genome, Burrows-Wheeler indexing allows Bowtie to align more than 25 million reads per CPU hour with a memory footprint of approximately 1.3 gigabytes. Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware backtracking algorithm that permits mismatches. Multiple processor cores can be used simultaneously to achieve even greater alignment speeds. Bowtie is open source http://bowtie.cbcb.umd.edu.}, number = {3}, urldate = {2019-10-10}, journal = {Genome Biology}, author = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven L.}, month = mar, year = {2009}, pages = {R25}, file = {Full Text:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GV3FPQW4/Langmead et al. - 2009 - Ultrafast and memory-efficient alignment of short .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/LZ63RT6D/gb-2009-10-3-r25.html:text/html} } @article{jolma_multiplexed_2010-1, title = {Multiplexed massively parallel {SELEX} for characterization of human transcription factor binding specificities}, volume = {20}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/20/6/861}, doi = {10.1101/gr.100552.109}, abstract = {The genetic code—the binding specificity of all transfer-RNAs—defines how protein primary structure is determined by DNA sequence. DNA also dictates when and where proteins are expressed, and this information is encoded in a pattern of specific sequence motifs that are recognized by transcription factors. However, the DNA-binding specificity is only known for a small fraction of the ∼1400 human transcription factors (TFs). We describe here a high-throughput method for analyzing transcription factor binding specificity that is based on systematic evolution of ligands by exponential enrichment (SELEX) and massively parallel sequencing. The method is optimized for analysis of large numbers of TFs in parallel through the use of affinity-tagged proteins, barcoded selection oligonucleotides, and multiplexed sequencing. Data are analyzed by a new bioinformatic platform that uses the hundreds of thousands of sequencing reads obtained to control the quality of the experiments and to generate binding motifs for the TFs. The described technology allows higher throughput and identification of much longer binding profiles than current microarray-based methods. In addition, as our method is based on proteins expressed in mammalian cells, it can also be used to characterize DNA-binding preferences of full-length proteins or proteins requiring post-translational modifications. We validate the method by determining binding specificities of 14 different classes of TFs and by confirming the specificities for NFATC1 and RFX3 using ChIP-seq. Our results reveal unexpected dimeric modes of binding for several factors that were thought to preferentially bind DNA as monomers.}, language = {en}, number = {6}, urldate = {2019-10-14}, journal = {Genome Research}, author = {Jolma, Arttu and Kivioja, Teemu and Toivonen, Jarkko and Cheng, Lu and Wei, Gonghong and Enge, Martin and Taipale, Mikko and Vaquerizas, Juan M. and Yan, Jian and Sillanpää, Mikko J. and Bonke, Martin and Palin, Kimmo and Talukder, Shaheynoor and Hughes, Timothy R. and Luscombe, Nicholas M. and Ukkonen, Esko and Taipale, Jussi}, month = jun, year = {2010}, pmid = {20378718}, pages = {861--873}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UQHZIZKE/Jolma et al. - 2010 - Multiplexed massively parallel SELEX for character.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/WKC3TTSA/861.html:text/html} } @article{newburger_uniprobe:_2009, title = {{UniPROBE}: an online database of protein binding microarray data on protein–{DNA} interactions}, volume = {37}, issn = {0305-1048}, shorttitle = {{UniPROBE}}, url = {https://academic.oup.com/nar/article/37/suppl_1/D77/1002702}, doi = {10.1093/nar/gkn660}, abstract = {Abstract. The UniPROBE (Universal PBM Resource for Oligonucleotide Binding Evaluation) database hosts data generated by universal protein binding microarray (P}, language = {en}, number = {suppl\_1}, urldate = {2019-10-15}, journal = {Nucleic Acids Research}, author = {Newburger, Daniel E. and Bulyk, Martha L.}, month = jan, year = {2009}, pages = {D77--D82}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IVHMVAC8/Newburger and Bulyk - 2009 - UniPROBE an online database of protein binding mi.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/VH3GBGI6/1002702.html:text/html} } @misc{farr_softbank-backed_2018, title = {{SoftBank}-backed 10x {Genomics} makes a clever buy to help unlock the human biological code}, url = {https://www.cnbc.com/2018/08/28/sofbtank-backed-10x-genomics-buys-epinomics.html}, abstract = {10x Genomics is one of the best-funded private companies involved in the field of genomics, or unlocking the human biological code to improve health. Now it's bought Epinomics in the super-hot field of epigenetics, to help it stay ahead of the game.}, language = {en}, urldate = {2019-10-18}, journal = {CNBC}, author = {Farr, Christina}, month = aug, year = {2018}, file = {Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3ANUEE2F/sofbtank-backed-10x-genomics-buys-epinomics.html:text/html} } @misc{hepler_10x_2018, title = {10x {Genomics} takes gene imaging and analysis tools to the big leagues}, url = {https://www.bizjournals.com/sanfrancisco/news/2018/10/30/fast-100-2018-10x-genomics-gene-imaging-tools.html}, abstract = {Since first commercializing its DNA sequencing technology in 2015, 10x Genomics has sold an ever-expanding range of advanced imaging and analysis tools to academic, government and drug development labs, many able to isolate disease pathogens or other biological materials down to the level of individual cells.}, urldate = {2019-10-18}, journal = {San Francisco Business Times}, author = {Hepler, Lauren}, month = oct, year = {2018}, file = {Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/SHTIQ6A8/fast-100-2018-10x-genomics-gene-imaging-tools.html:text/html} } @article{fu_motifviz:_2004, title = {{MotifViz}: an analysis and visualization tool for motif discovery}, volume = {32}, issn = {0305-1048}, shorttitle = {{MotifViz}}, url = {https://academic.oup.com/nar/article/32/suppl_2/W420/1040632}, doi = {10.1093/nar/gkh426}, abstract = {Abstract. Detecting overrepresented known transcription factor binding motifs in a set of promoter sequences of co-regulated genes has become an important appr}, language = {en}, number = {suppl\_2}, urldate = {2019-10-21}, journal = {Nucleic Acids Research}, author = {Fu, Yutao and Frith, Martin C. and Haverty, Peter M. and Weng, Zhiping}, month = jul, year = {2004}, pages = {W420--W423}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/46DY4PV6/Fu et al. - 2004 - MotifViz an analysis and visualization tool for m.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XQW6XYP2/1040632.html:text/html} } @article{schones_statistical_2007, title = {Statistical significance of cis-regulatory modules}, volume = {8}, issn = {1471-2105}, url = {https://doi.org/10.1186/1471-2105-8-19}, doi = {10.1186/1471-2105-8-19}, abstract = {It is becoming increasingly important for researchers to be able to scan through large genomic regions for transcription factor binding sites or clusters of binding sites forming cis-regulatory modules. Correspondingly, there has been a push to develop algorithms for the rapid detection and assessment of cis-regulatory modules. While various algorithms for this purpose have been introduced, most are not well suited for rapid, genome scale scanning.}, number = {1}, urldate = {2019-10-21}, journal = {BMC Bioinformatics}, author = {Schones, Dustin E. and Smith, Andrew D. and Zhang, Michael Q.}, month = jan, year = {2007}, pages = {19}, file = {Full Text:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KNIXJDB7/Schones et al. - 2007 - Statistical significance of cis-regulatory modules.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/2DYWQFG3/1471-2105-8-19.html:text/html} } @article{zhao_tred:_2005, title = {{TRED}: a {Transcriptional} {Regulatory} {Element} {Database} and a platform for in silico gene regulation studies}, volume = {33}, issn = {0305-1048}, shorttitle = {{TRED}}, url = {https://academic.oup.com/nar/article/33/suppl_1/D103/2505195}, doi = {10.1093/nar/gki004}, abstract = {Abstract. In order to understand gene regulation, accurate and comprehensive knowledge of transcriptional regulatory elements is essential. Here, we report ou}, language = {en}, number = {suppl\_1}, urldate = {2019-10-21}, journal = {Nucleic Acids Research}, author = {Zhao, Fang and Xuan, Zhenyu and Liu, Lihua and Zhang, Michael Q.}, month = jan, year = {2005}, pages = {D103--D107}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/G6MVKQNX/Zhao et al. - 2005 - TRED a Transcriptional Regulatory Element Databas.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5J79Z4Z9/2505195.html:text/html} } @article{aerts_toucan:_2003, title = {Toucan: deciphering the cis ‐regulatory logic of coregulated genes}, volume = {31}, issn = {0305-1048}, shorttitle = {Toucan}, url = {https://academic.oup.com/nar/article/31/6/1753/2380612}, doi = {10.1093/nar/gkg268}, abstract = {Abstract. TOUCAN is a Java application for the rapid discovery of significant cis ‐regulatory elements from sets of coexpressed or coregulated genes. Biologis}, language = {en}, number = {6}, urldate = {2019-10-21}, journal = {Nucleic Acids Research}, author = {Aerts, Stein and Thijs, Gert and Coessens, Bert and Staes, Mik and Moreau, Yves and Moor, Bart De}, month = mar, year = {2003}, pages = {1753--1764}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/65QL2XU9/Aerts et al. - 2003 - Toucan deciphering the cis ‐regulatory logic of c.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/LZPNVTTI/2380612.html:text/html} } @article{turatsinze_using_2008, title = {Using {RSAT} to scan genome sequences for transcription factor binding sites and cis -regulatory modules}, volume = {3}, copyright = {2008 Nature Publishing Group}, issn = {1750-2799}, url = {https://www.nature.com/articles/nprot.2008.97}, doi = {10.1038/nprot.2008.97}, abstract = {This protocol shows how to detect putative cis-regulatory elements and regions enriched in such elements with the regulatory sequence analysis tools (RSAT) web server ( http://rsat.ulb.ac.be/rsat/ ). The approach applies to known transcription factors, whose binding specificity is represented by position-specific scoring matrices, using the program matrix-scan. The detection of individual binding sites is known to return many false predictions. However, results can be strongly improved by estimating P value, and by searching for combinations of sites (homotypic and heterotypic models). We illustrate the detection of sites and enriched regions with a study case, the upstream sequence of the Drosophila melanogaster gene even-skipped. This protocol is also tested on random control sequences to evaluate the reliability of the predictions. Each task requires a few minutes of computation time on the server. The complete protocol can be executed in about one hour.}, language = {en}, number = {10}, urldate = {2019-10-21}, journal = {Nature Protocols}, author = {Turatsinze, Jean-Valery and Thomas-Chollier, Morgane and Defrance, Matthieu and Helden, Jacques van}, month = oct, year = {2008}, pages = {1578--1588}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HQVBEECI/Turatsinze et al. - 2008 - Using RSAT to scan genome sequences for transcript.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QGYVMNME/nprot.2008.html:text/html} } @article{ambrosini_chip-seq_2016-1, title = {The {ChIP}-{Seq} tools and web server: a resource for analyzing {ChIP}-seq and other types of genomic data}, volume = {17}, issn = {1471-2164}, shorttitle = {The {ChIP}-{Seq} tools and web server}, url = {https://doi.org/10.1186/s12864-016-3288-8}, doi = {10.1186/s12864-016-3288-8}, abstract = {ChIP-seq and related high-throughput chromatin profilig assays generate ever increasing volumes of highly valuable biological data. To make sense out of it, biologists need versatile, efficient and user-friendly tools for access, visualization and itegrative analysis of such data.}, number = {1}, urldate = {2019-10-22}, journal = {BMC Genomics}, author = {Ambrosini, Giovanna and Dreos, René and Kumar, Sunil and Bucher, Philipp}, month = nov, year = {2016}, pages = {938}, file = {Full Text:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/YMDLG4CD/Ambrosini et al. - 2016 - The ChIP-Seq tools and web server a resource for .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/EBDZ5T6H/s12864-016-3288-8.html:text/html} } @article{pollard_detection_2010, title = {Detection of nonneutral substitution rates on mammalian phylogenies}, volume = {20}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/20/1/110}, doi = {10.1101/gr.097857.109}, abstract = {Methods for detecting nucleotide substitution rates that are faster or slower than expected under neutral drift are widely used to identify candidate functional elements in genomic sequences. However, most existing methods consider either reductions (conservation) or increases (acceleration) in rate but not both, or assume that selection acts uniformly across the branches of a phylogeny. Here we examine the more general problem of detecting departures from the neutral rate of substitution in either direction, possibly in a clade-specific manner. We consider four statistical, phylogenetic tests for addressing this problem: a likelihood ratio test, a score test, a test based on exact distributions of numbers of substitutions, and the genomic evolutionary rate profiling (GERP) test. All four tests have been implemented in a freely available program called phyloP. Based on extensive simulation experiments, these tests are remarkably similar in statistical power. With 36 mammalian species, they all appear to be capable of fairly good sensitivity with low false-positive rates in detecting strong selection at individual nucleotides, moderate selection in 3-bp elements, and weaker or clade-specific selection in longer elements. By applying phyloP to mammalian multiple alignments from the ENCODE project, we shed light on patterns of conservation/acceleration in known and predicted functional elements, approximate fractions of sites subject to constraint, and differences in clade-specific selection in the primate and glires clades. We also describe new “Conservation” tracks in the UCSC Genome Browser that display both phyloP and phastCons scores for genome-wide alignments of 44 vertebrate species.}, language = {en}, number = {1}, urldate = {2019-10-22}, journal = {Genome Research}, author = {Pollard, Katherine S. and Hubisz, Melissa J. and Rosenbloom, Kate R. and Siepel, Adam}, month = jan, year = {2010}, pmid = {19858363}, pages = {110--121}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4EC5QHN3/Pollard et al. - 2010 - Detection of nonneutral substitution rates on mamm.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9A48EMZC/110.html:text/html} } @article{siepel_evolutionarily_2005, title = {Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes}, volume = {15}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/15/8/1034}, doi = {10.1101/gr.3715005}, abstract = {We have conducted a comprehensive search for conserved elements in vertebrate genomes, using genome-wide multiple alignments of five vertebrate species (human, mouse, rat, chicken, and Fugu rubripes). Parallel searches have been performed with multiple alignments of four insect species (three species of Drosophila and Anopheles gambiae), two species of Caenorhabditis, and seven species of Saccharomyces. Conserved elements were identified with a computer program called phastCons, which is based on a two-state phylogenetic hidden Markov model (phylo-HMM). PhastCons works by fitting a phylo-HMM to the data by maximum likelihood, subject to constraints designed to calibrate the model across species groups, and then predicting conserved elements based on this model. The predicted elements cover roughly 3\%–8\% of the human genome (depending on the details of the calibration procedure) and substantially higher fractions of the more compact Drosophila melanogaster (37\%–53\%), Caenorhabditis elegans (18\%–37\%), and Saccharaomyces cerevisiae (47\%–68\%) genomes. From yeasts to vertebrates, in order of increasing genome size and general biological complexity, increasing fractions of conserved bases are found to lie outside of the exons of known protein-coding genes. In all groups, the most highly conserved elements (HCEs), by log-odds score, are hundreds or thousands of bases long. These elements share certain properties with ultraconserved elements, but they tend to be longer and less perfectly conserved, and they overlap genes of somewhat different functional categories. In vertebrates, HCEs are associated with the 3′ UTRs of regulatory genes, stable gene deserts, and megabase-sized regions rich in moderately conserved noncoding sequences. Noncoding HCEs also show strong statistical evidence of an enrichment for RNA secondary structure.}, language = {en}, number = {8}, urldate = {2019-10-22}, journal = {Genome Research}, author = {Siepel, Adam and Bejerano, Gill and Pedersen, Jakob S. and Hinrichs, Angie S. and Hou, Minmei and Rosenbloom, Kate and Clawson, Hiram and Spieth, John and Hillier, LaDeana W. and Richards, Stephen and Weinstock, George M. and Wilson, Richard K. and Gibbs, Richard A. and Kent, W. James and Miller, Webb and Haussler, David}, month = aug, year = {2005}, pmid = {16024819}, pages = {1034--1050}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HGVX3DX7/Siepel et al. - 2005 - Evolutionarily conserved elements in vertebrate, i.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/JGN9UP7F/1034.html:text/html} } @article{barrett_ncbi_2011, title = {{NCBI} {GEO}: archive for functional genomics data sets—10 years on}, volume = {39}, issn = {0305-1048}, shorttitle = {{NCBI} {GEO}}, url = {https://academic.oup.com/nar/article/39/suppl_1/D1005/2507594}, doi = {10.1093/nar/gkq1184}, abstract = {Abstract. A decade ago, the Gene Expression Omnibus (GEO) database was established at the National Center for Biotechnology Information (NCBI). The original o}, language = {en}, number = {suppl\_1}, urldate = {2019-10-22}, journal = {Nucleic Acids Research}, author = {Barrett, Tanya and Troup, Dennis B. and Wilhite, Stephen E. and Ledoux, Pierre and Evangelista, Carlos and Kim, Irene F. and Tomashevsky, Maxim and Marshall, Kimberly A. and Phillippy, Katherine H. and Sherman, Patti M. and Muertter, Rolf N. and Holko, Michelle and Ayanbule, Oluwabukunmi and Yefanov, Andrey and Soboleva, Alexandra}, month = jan, year = {2011}, pages = {D1005--D1010}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/P285SMU2/Barrett et al. - 2011 - NCBI GEO archive for functional genomics data set.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/MBQG6ITJ/2507594.html:text/html} } @article{rustici_arrayexpress_2013, title = {{ArrayExpress} update—trends in database growth and links to data analysis tools}, volume = {41}, issn = {0305-1048}, url = {https://academic.oup.com/nar/article/41/D1/D987/1066874}, doi = {10.1093/nar/gks1174}, abstract = {Abstract. The ArrayExpress Archive of Functional Genomics Data (http://www.ebi.ac.uk/arrayexpress) is one of three international functional genomics public dat}, language = {en}, number = {D1}, urldate = {2019-10-22}, journal = {Nucleic Acids Research}, author = {Rustici, Gabriella and Kolesnikov, Nikolay and Brandizi, Marco and Burdett, Tony and Dylag, Miroslaw and Emam, Ibrahim and Farne, Anna and Hastings, Emma and Ison, Jon and Keays, Maria and Kurbatova, Natalja and Malone, James and Mani, Roby and Mupo, Annalisa and Pedro Pereira, Rui and Pilicheva, Ekaterina and Rung, Johan and Sharma, Anjan and Tang, Y. Amy and Ternent, Tobias and Tikhonov, Andrew and Welter, Danielle and Williams, Eleanor and Brazma, Alvis and Parkinson, Helen and Sarkans, Ugis}, month = jan, year = {2013}, pages = {D987--D990}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/I5XQXQMN/Rustici et al. - 2013 - ArrayExpress update—trends in database growth and .pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3KBAA7YZ/1066874.html:text/html} } @article{roadmap_epigenomics_consortium_integrative_2015, title = {Integrative analysis of 111 reference human epigenomes}, volume = {518}, copyright = {2015 Nature Publishing Group}, issn = {1476-4687}, url = {https://www.nature.com/articles/nature14248}, doi = {10.1038/nature14248}, abstract = {The reference human genome sequence set the stage for studies of genetic variation and its association with human disease, but epigenomic studies lack a similar reference. To address this need, the NIH Roadmap Epigenomics Consortium generated the largest collection so far of human epigenomes for primary cells and tissues. Here we describe the integrative analysis of 111 reference human epigenomes generated as part of the programme, profiled for histone modification patterns, DNA accessibility, DNA methylation and RNA expression. We establish global maps of regulatory elements, define regulatory modules of coordinated activity, and their likely activators and repressors. We show that disease- and trait-associated genetic variants are enriched in tissue-specific epigenomic marks, revealing biologically relevant cell types for diverse human traits, and providing a resource for interpreting the molecular basis of human disease. Our results demonstrate the central role of epigenomic information for understanding gene regulation, cellular differentiation and human disease.}, language = {en}, number = {7539}, urldate = {2019-10-22}, journal = {Nature}, author = {{Roadmap Epigenomics Consortium} and Kundaje, Anshul and Meuleman, Wouter and Ernst, Jason and Bilenky, Misha and Yen, Angela and Heravi-Moussavi, Alireza and Kheradpour, Pouya and Zhang, Zhizhuo and Wang, Jianrong and Ziller, Michael J. and Amin, Viren and Whitaker, John W. and Schultz, Matthew D. and Ward, Lucas D. and Sarkar, Abhishek and Quon, Gerald and Sandstrom, Richard S. and Eaton, Matthew L. and Wu, Yi-Chieh and Pfenning, Andreas R. and Wang, Xinchen and Claussnitzer, Melina and {Yaping Liu} and Coarfa, Cristian and Alan Harris, R. and Shoresh, Noam and Epstein, Charles B. and Gjoneska, Elizabeta and Leung, Danny and Xie, Wei and David Hawkins, R. and Lister, Ryan and Hong, Chibo and Gascard, Philippe and Mungall, Andrew J. and Moore, Richard and Chuah, Eric and Tam, Angela and Canfield, Theresa K. and Scott Hansen, R. and Kaul, Rajinder and Sabo, Peter J. and Bansal, Mukul S. and Carles, Annaick and Dixon, Jesse R. and Farh, Kai-How and Feizi, Soheil and Karlic, Rosa and Kim, Ah-Ram and Kulkarni, Ashwinikumar and Li, Daofeng and Lowdon, Rebecca and Elliott, GiNell and Mercer, Tim R. and Neph, Shane J. and Onuchic, Vitor and Polak, Paz and Rajagopal, Nisha and Ray, Pradipta and Sallari, Richard C. and Siebenthall, Kyle T. and Sinnott-Armstrong, Nicholas A. and Stevens, Michael and Thurman, Robert E. and Wu, Jie and Zhang, Bo and Zhou, Xin and Beaudet, Arthur E. and Boyer, Laurie A. and Jager, Philip L. De and Farnham, Peggy J. and Fisher, Susan J. and Haussler, David and Jones, Steven J. M. and Li, Wei and Marra, Marco A. and McManus, Michael T. and Sunyaev, Shamil and Thomson, James A. and Tlsty, Thea D. and Tsai, Li-Huei and Wang, Wei and Waterland, Robert A. and Zhang, Michael Q. and Chadwick, Lisa H. and Bernstein, Bradley E. and Costello, Joseph F. and Ecker, Joseph R. and Hirst, Martin and Meissner, Alexander and Milosavljevic, Aleksandar and Ren, Bing and Stamatoyannopoulos, John A. and Wang, Ting and Kellis, Manolis}, month = feb, year = {2015}, pages = {317--330}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RKEQPY7V/Roadmap Epigenomics Consortium et al. - 2015 - Integrative analysis of 111 reference human epigen.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/7LD5I44Y/nature14248.html:text/html} } @article{lizio_gateways_2015, title = {Gateways to the {FANTOM}5 promoter level mammalian expression atlas}, volume = {16}, issn = {1465-6906}, url = {https://doi.org/10.1186/s13059-014-0560-6}, doi = {10.1186/s13059-014-0560-6}, abstract = {The FANTOM5 project investigates transcription initiation activities in more than 1,000 human and mouse primary cells, cell lines and tissues using CAGE. Based on manual curation of sample information and development of an ontology for sample classification, we assemble the resulting data into a centralized data resource (http://fantom.gsc.riken.jp/5/). This resource contains web-based tools and data-access points for the research community to search and extract data related to samples, genes, promoter activities, transcription factors and enhancers across the FANTOM5 atlas.}, number = {1}, urldate = {2019-10-22}, journal = {Genome Biology}, author = {Lizio, Marina and Harshbarger, Jayson and Shimoji, Hisashi and Severin, Jessica and Kasukawa, Takeya and Sahin, Serkan and Abugessaisa, Imad and Fukuda, Shiro and Hori, Fumi and Ishikawa-Kato, Sachi and Mungall, Christopher J. and Arner, Erik and Baillie, J. Kenneth and Bertin, Nicolas and Bono, Hidemasa and de Hoon, Michiel and Diehl, Alexander D. and Dimont, Emmanuel and Freeman, Tom C. and Fujieda, Kaori and Hide, Winston and Kaliyaperumal, Rajaram and Katayama, Toshiaki and Lassmann, Timo and Meehan, Terrence F. and Nishikata, Koro and Ono, Hiromasa and Rehli, Michael and Sandelin, Albin and Schultes, Erik A. and ‘t Hoen, Peter AC and Tatum, Zuotian and Thompson, Mark and Toyoda, Tetsuro and Wright, Derek W. and Daub, Carsten O. and Itoh, Masayoshi and Carninci, Piero and Hayashizaki, Yoshihide and Forrest, Alistair RR and Kawaji, Hideya and {the FANTOM consortium}}, month = jan, year = {2015}, pages = {22}, file = {Full Text:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/3V4NS8ZW/Lizio et al. - 2015 - Gateways to the FANTOM5 promoter level mammalian e.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BCB959CN/s13059-014-0560-6.html:text/html} } @article{langmead_fast_2012, title = {Fast gapped-read alignment with {Bowtie} 2}, volume = {9}, copyright = {2012 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1548-7105}, url = {https://www.nature.com/articles/nmeth.1923}, doi = {10.1038/nmeth.1923}, abstract = {The Bowtie 2 software achieves fast, sensitive, accurate and memory-efficient gapped alignment of sequencing reads using the full-text minute index and hardware-accelerated dynamic programming algorithms.}, language = {en}, number = {4}, urldate = {2019-10-22}, journal = {Nature Methods}, author = {Langmead, Ben and Salzberg, Steven L.}, month = apr, year = {2012}, pages = {357--359}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UXJPBHQG/Langmead and Salzberg - 2012 - Fast gapped-read alignment with Bowtie 2.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/4249ERH7/nmeth.html:text/html} } @article{ambrosini_signal_2003, title = {Signal search analysis server}, volume = {31}, issn = {0305-1048}, url = {https://academic.oup.com/nar/article/31/13/3618/2904233}, doi = {10.1093/nar/gkg611}, abstract = {Abstract. Signal search analysis is a general method to discover and characterize sequence motifs that are positionally correlated with a functional site (e.g.}, language = {en}, number = {13}, urldate = {2019-10-22}, journal = {Nucleic Acids Research}, author = {Ambrosini, Giovanna and Praz, Viviane and Jagannathan, Vidhya and Bucher, Philipp}, month = jul, year = {2003}, pages = {3618--3620}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IINV3SZZ/Ambrosini et al. - 2003 - Signal search analysis server.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ELWD6TWU/2904233.html:text/html} } @article{quinlan_bedtools:_2010, title = {{BEDTools}: a flexible suite of utilities for comparing genomic features}, volume = {26}, issn = {1367-4803}, shorttitle = {{BEDTools}}, url = {https://academic.oup.com/bioinformatics/article/26/6/841/244688}, doi = {10.1093/bioinformatics/btq033}, abstract = {Abstract. Motivation: Testing for correlations between different sets of genomic features is a fundamental task in genomics research. However, searching for ov}, language = {en}, number = {6}, urldate = {2019-10-22}, journal = {Bioinformatics}, author = {Quinlan, Aaron R. and Hall, Ira M.}, month = mar, year = {2010}, pages = {841--842}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/UBRVAYEZ/Quinlan and Hall - 2010 - BEDTools a flexible suite of utilities for compar.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/XWQUFD34/244688.html:text/html} } @article{li_sequence_2009, title = {The {Sequence} {Alignment}/{Map} format and {SAMtools}}, volume = {25}, issn = {1367-4803}, url = {https://academic.oup.com/bioinformatics/article/25/16/2078/204688}, doi = {10.1093/bioinformatics/btp352}, abstract = {Abstract. Summary: The Sequence Alignment/Map (SAM) format is a generic alignment format for storing read alignments against reference sequences, supporting sh}, language = {en}, number = {16}, urldate = {2019-10-22}, journal = {Bioinformatics}, author = {Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan, Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin, Richard}, month = aug, year = {2009}, pages = {2078--2079}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/389A3AHF/Li et al. - 2009 - The Sequence AlignmentMap format and SAMtools.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Y2BHY5YE/204688.html:text/html} } @article{pizzi_fast_2008, series = {{SAIL} – {String} {Algorithms}, {Information} and {Learning}: {Dedicated} to {Professor} {Alberto} {Apostolico} on the occasion of his 60th birthday}, title = {Fast profile matching algorithms — {A} survey}, volume = {395}, issn = {0304-3975}, url = {http://www.sciencedirect.com/science/article/pii/S0304397508000327}, doi = {10.1016/j.tcs.2008.01.015}, abstract = {Position-specific scoring matrices are a popular choice for modelling signals or motifs in biological sequences, both in DNA and protein contexts. A lot of effort has been dedicated to the definition of suitable scores and thresholds for increasing the specificity of the model and the sensitivity of the search. It is quite surprising that, until very recently, little attention has been paid to the actual process of finding the matches of the matrices in a set of sequences, once the score and the threshold have been fixed. In fact, most profile matching tools still rely on a simple sliding window approach to scan the input sequences. This can be a very time expensive routine when searching for hits of a large set of scoring matrices in a sequence database. In this paper we will give a survey of proposed approaches to speed up profile matching based on statistical significance, multipattern matching, filtering, indexing data structures, matrix partitioning, Fast Fourier Transform and data compression. These approaches improve the expected searching time of profile matching, thus leading to implementation of faster tools in practice.}, language = {en}, number = {2}, urldate = {2019-10-22}, journal = {Theoretical Computer Science}, author = {Pizzi, Cinzia and Ukkonen, Esko}, month = may, year = {2008}, keywords = {Algorithms, Computational complexity, Position-specific scoring matrix, Profile matching, PSSM}, pages = {137--157}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ACCY6HIU/Pizzi and Ukkonen - 2008 - Fast profile matching algorithms — A survey.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/VA9HWTNL/S0304397508000327.html:text/html} } @article{dreos_eukaryotic_2017-1, title = {The eukaryotic promoter database in its 30th year: focus on non-vertebrate organisms}, volume = {45}, issn = {0305-1048}, shorttitle = {The eukaryotic promoter database in its 30th year}, url = {https://academic.oup.com/nar/article/45/D1/D51/2605665}, doi = {10.1093/nar/gkw1069}, abstract = {Abstract. We present an update of the Eukaryotic Promoter Database EPD (http://epd.vital-it.ch), more specifically on the EPDnew division, which contains compr}, language = {en}, number = {D1}, urldate = {2019-10-22}, journal = {Nucleic Acids Research}, author = {Dreos, René and Ambrosini, Giovanna and Groux, Romain and Cavin Périer, Rouaïda and Bucher, Philipp}, month = jan, year = {2017}, pages = {D51--D55}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KUF6W292/Dreos et al. - 2017 - The eukaryotic promoter database in its 30th year.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/RSNETHL7/2605665.html:text/html} } @article{bucher_compilation_1986, title = {Compilation and analysis of eukaryotic {POL} {II} promoter sequences}, volume = {14}, issn = {0305-1048}, url = {https://academic.oup.com/nar/article/14/24/10009/1454023}, doi = {10.1093/nar/14.24.10009}, abstract = {Abstract. A representative set of 168 eukaryotic POL II promoters has been compiled from the EMBL library and subjected to computer signal search analysis. App}, language = {en}, number = {24}, urldate = {2019-10-23}, journal = {Nucleic Acids Research}, author = {Bucher, Philipp and Trifonov, Edward N.}, month = dec, year = {1986}, pages = {10009--10026}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/8Q4JIJLB/Bucher and Trifonov - 1986 - Compilation and analysis of eukaryotic POL II prom.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/BKP7BTWF/1454023.html:text/html} } @article{dreos_epd_2013, title = {{EPD} and {EPDnew}, high-quality promoter resources in the next-generation sequencing era}, volume = {41}, issn = {0305-1048}, url = {https://academic.oup.com/nar/article/41/D1/D157/1070274}, doi = {10.1093/nar/gks1233}, abstract = {Abstract. The Eukaryotic Promoter Database (EPD), available online at http://epd.vital-it.ch, is a collection of experimentally defined eukaryotic POL II promo}, language = {en}, number = {D1}, urldate = {2019-10-23}, journal = {Nucleic Acids Research}, author = {Dreos, René and Ambrosini, Giovanna and Cavin Périer, Rouayda and Bucher, Philipp}, month = jan, year = {2013}, pages = {D157--D164}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9E8E3M9K/Dreos et al. - 2013 - EPD and EPDnew, high-quality promoter resources in.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/S2PMUE3K/1070274.html:text/html} } @article{dreos_eukaryotic_2015, title = {The {Eukaryotic} {Promoter} {Database}: expansion of {EPDnew} and new promoter analysis tools}, volume = {43}, issn = {0305-1048}, shorttitle = {The {Eukaryotic} {Promoter} {Database}}, url = {https://academic.oup.com/nar/article/43/D1/D92/2437610}, doi = {10.1093/nar/gku1111}, abstract = {Abstract. We present an update of EPDNew (http://epd.vital-it.ch), a recently introduced new part of the Eukaryotic Promoter Database (EPD) which has been desc}, language = {en}, number = {D1}, urldate = {2019-10-23}, journal = {Nucleic Acids Research}, author = {Dreos, René and Ambrosini, Giovanna and Périer, Rouayda Cavin and Bucher, Philipp}, month = jan, year = {2015}, pages = {D92--D96}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/U86EHUSS/Dreos et al. - 2015 - The Eukaryotic Promoter Database expansion of EPD.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6DX5KZ75/2437610.html:text/html} } @article{raney_track_2014, title = {Track data hubs enable visualization of user-defined genome-wide annotations on the {UCSC} {Genome} {Browser}}, volume = {30}, issn = {1367-4803}, url = {https://academic.oup.com/bioinformatics/article/30/7/1003/232409}, doi = {10.1093/bioinformatics/btt637}, abstract = {Abstract. Summary: Track data hubs provide an efficient mechanism for visualizing remotely hosted Internet-accessible collections of genome annotations. Hub da}, language = {en}, number = {7}, urldate = {2019-10-23}, journal = {Bioinformatics}, author = {Raney, Brian J. and Dreszer, Timothy R. and Barber, Galt P. and Clawson, Hiram and Fujita, Pauline A. and Wang, Ting and Nguyen, Ngan and Paten, Benedict and Zweig, Ann S. and Karolchik, Donna and Kent, W. James}, month = apr, year = {2014}, pages = {1003--1005}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/D2DPKLJ9/Raney et al. - 2014 - Track data hubs enable visualization of user-defin.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/E4FZ9PLF/232409.html:text/html} } @article{wu_biogps:_2016, title = {{BioGPS}: building your own mash-up of gene annotations and expression profiles}, volume = {44}, issn = {0305-1048}, shorttitle = {{BioGPS}}, url = {https://academic.oup.com/nar/article/44/D1/D313/2502613}, doi = {10.1093/nar/gkv1104}, abstract = {Abstract. BioGPS (http://biogps.org) is a centralized gene-annotation portal that enables researchers to access distributed gene annotation resources. This art}, language = {en}, number = {D1}, urldate = {2019-10-24}, journal = {Nucleic Acids Research}, author = {Wu, Chunlei and Jin, Xuefeng and Tsueng, Ginger and Afrasiabi, Cyrus and Su, Andrew I.}, month = jan, year = {2016}, pages = {D313--D316}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/J2ZC7W2T/Wu et al. - 2016 - BioGPS building your own mash-up of gene annotati.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/6ZC4YZDM/2502613.html:text/html} } @article{ong_ctcf:_2014, title = {{CTCF}: an architectural protein bridging genome topology and function}, volume = {15}, copyright = {2014 Nature Publishing Group}, issn = {1471-0064}, shorttitle = {{CTCF}}, url = {https://www.nature.com/articles/nrg3663}, doi = {10.1038/nrg3663}, abstract = {The eukaryotic genome is organized in the three-dimensional nuclear space in a specific manner that is both a cause and a consequence of its function. This organization is partly established by a special class of architectural proteins, of which CCCTC-binding factor (CTCF) is the best characterized. Although CTCF has been assigned various roles that are often contradictory, new results now help to draw a unifying model to explain the many functions of this protein. CTCF creates boundaries between topologically associating domains in chromosomes and, within these domains, facilitates interactions between transcription regulatory sequences. Thus, CTCF links the architecture of the genome to its function.}, language = {en}, number = {4}, urldate = {2019-10-30}, journal = {Nature Reviews Genetics}, author = {Ong, Chin-Tong and Corces, Victor G.}, month = apr, year = {2014}, pages = {234--246}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TSYLBXP8/Ong and Corces - 2014 - CTCF an architectural protein bridging genome top.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TXTPY6FE/nrg3663.html:text/html} } @article{bonev_organization_2016, title = {Organization and function of the 3D genome}, volume = {17}, copyright = {2016 Nature Publishing Group}, issn = {1471-0064}, url = {https://www.nature.com/articles/nrg.2016.112}, doi = {10.1038/nrg.2016.112}, abstract = {Understanding how chromatin is organized within the nucleus and how this 3D architecture influences gene regulation, cell fate decisions and evolution are major questions in cell biology. Despite spectacular progress in this field, we still know remarkably little about the mechanisms underlying chromatin structure and how it can be established, reset and maintained. In this Review, we discuss the insights into chromatin architecture that have been gained through recent technological developments in quantitative biology, genomics and cell and molecular biology approaches and explain how these new concepts have been used to address important biological questions in development and disease.}, language = {en}, number = {11}, urldate = {2019-10-30}, journal = {Nature Reviews Genetics}, author = {Bonev, Boyan and Cavalli, Giacomo}, month = nov, year = {2016}, pages = {661--678}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5U5C956N/Bonev and Cavalli - 2016 - Organization and function of the 3D genome.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/A777IHZ4/nrg.2016.html:text/html} } @article{losada_cohesin_2014, title = {Cohesin in cancer: chromosome segregation and beyond}, volume = {14}, copyright = {2014 Nature Publishing Group}, issn = {1474-1768}, shorttitle = {Cohesin in cancer}, url = {https://www.nature.com/articles/nrc3743}, doi = {10.1038/nrc3743}, abstract = {Cohesin is an evolutionarily conserved, four-subunit complex that entraps DNA fibres within its ring-shaped structure. It was originally identified and named for its role in mediating sister chromatid cohesion, which is essential for chromosome segregation and DNA repair. Increasing evidence indicates that cohesin participates in other processes that involve DNA looping, most importantly, transcriptional regulation. Mutations in genes encoding cohesin subunits and other regulators of the complex have recently been identified in several types of tumours. Whether aneuploidy that results from chromosome missegregation is the major contribution of cohesin mutations to cancer progression is under debate.}, language = {en}, number = {6}, urldate = {2019-10-30}, journal = {Nature Reviews Cancer}, author = {Losada, Ana}, month = jun, year = {2014}, pages = {389--393}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5Z9GESZ9/Losada - 2014 - Cohesin in cancer chromosome segregation and beyo.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/438AGVU7/nrc3743.html:text/html} } @article{trifonov_cracking_2011, title = {Cracking the chromatin code: {Precise} rule of nucleosome positioning}, volume = {8}, issn = {1571-0645}, shorttitle = {Cracking the chromatin code}, url = {http://www.sciencedirect.com/science/article/pii/S1571064511000054}, doi = {10.1016/j.plrev.2011.01.004}, abstract = {Various aspects of packaging DNA in eukaryotic cells are outlined in physical rather than biological terms. The informational and physical nature of packaging instructions encoded in DNA sequences is discussed with the emphasis on signal processing difficulties – very low signal-to-noise ratio and high degeneracy of the nucleosome positioning signal. As the author has been contributing to the field from its very onset in 1980, the review is mostly focused at the works of the author and his colleagues. The leading concept of the overview is the role of deformational properties of DNA in the nucleosome positioning. The target of the studies is to derive the DNA bendability matrix describing where along the DNA various dinucleotide elements should be positioned, to facilitate its bending in the nucleosome. Three different approaches are described leading to derivation of the DNA deformability sequence pattern, which is a simplified linear presentation of the bendability matrix. All three approaches converge to the same unique sequence motif CGRAAATTTYCG or, in binary form, YRRRRRYYYYYR, both representing the chromatin code.}, language = {en}, number = {1}, urldate = {2019-11-08}, journal = {Physics of Life Reviews}, author = {Trifonov, Edward N.}, month = mar, year = {2011}, keywords = {Nucleosome mapping, DNA bendability, Ducleotide periodicity, Matrix of bendability, Nucleosome sequence pattern, Signal processing}, pages = {39--50}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/T2Y4DY4L/Trifonov - 2011 - Cracking the chromatin code Precise rule of nucle.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/VPWWSZR4/S1571064511000054.html:text/html} } @article{ioshikhes_variety_2011, title = {Variety of genomic {DNA} patterns for nucleosome positioning}, volume = {21}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/21/11/1863}, doi = {10.1101/gr.116228.110}, abstract = {Precise positioning of nucleosomes along DNA is important for a variety of gene regulatory processes. Among the factors directing nucleosome positioning, the DNA sequence is highly important. Two main classes of nucleosome positioning sequence (NPS) patterns have previously been described. In the first class, AA, TT, and other WW dinucleotides (where W is A or T) tend to occur together (in-phase) in the major groove of DNA closest to the histone octamer surface, while SS dinucleotides (where S is G or C) are predominantly positioned in the major groove facing outward. In the second class, AA and TT are structurally separated (AA backbone near the histone octamer, and TT backbone further away), but grouped with other RR (where R is purine A or G) and YY (where Y is pyrimidine C or T) dinucleotides. As a result, the RR/YY pattern includes counter-phase AA/TT distributions. We describe here anti-NPS patterns, which are inverse to the conventional NPS patterns: WW runs inverse to SS, and RR inverse to YY. Evidence for the biological relevance of anti-NPS patterns is presented.}, language = {en}, number = {11}, urldate = {2019-11-13}, journal = {Genome Research}, author = {Ioshikhes, Ilya and Hosid, Sergey and Pugh, B. Franklin}, month = nov, year = {2011}, pmid = {21750105}, pages = {1863--1871}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/GR4NYC26/Ioshikhes et al. - 2011 - Variety of genomic DNA patterns for nucleosome pos.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/ML77TLKJ/1863.html:text/html} } @article{boller_defining_2018, title = {Defining {B} {Cell} {Chromatin}: {Lessons} from {EBF}1}, volume = {34}, issn = {0168-9525}, shorttitle = {Defining {B} {Cell} {Chromatin}}, url = {http://www.sciencedirect.com/science/article/pii/S0168952517302342}, doi = {10.1016/j.tig.2017.12.014}, abstract = {Hematopoiesis is regulated by signals from the microenvironment, transcription factor networks, and changes of the epigenetic landscape. Transcription factors interact with and shape chromatin to allow for lineage- and cell type-specific changes in gene expression. During B lymphopoiesis, epigenetic regulation is observed in multilineage progenitors in which a specific chromatin context is established, at the onset of the B cell differentiation when early B cell factor 1 (EBF1) induces lineage-specific changes in chromatin, during V(D)J recombination and after antigen-driven activation of B cells and terminal differentiation. In this review, we discuss the epigenetic changes underlying B cell differentiation, focusing on the role of transcription factor EBF1 in B cell lineage priming.}, language = {en}, number = {4}, urldate = {2019-11-13}, journal = {Trends in Genetics}, author = {Boller, Sören and Li, Rui and Grosschedl, Rudolf}, month = apr, year = {2018}, keywords = {B cell differentiation, chromatin, Early B cell factor 1, transcription factor network}, pages = {257--269}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/42VZQNS2/Boller et al. - 2018 - Defining B Cell Chromatin Lessons from EBF1.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/NCBIG74Y/S0168952517302342.html:text/html} } @article{hagman_early_2005, title = {Early {B}-cell factor ‘pioneers’ the way for {B}-cell development}, volume = {26}, issn = {1471-4906}, url = {http://www.sciencedirect.com/science/article/pii/S1471490605001808}, doi = {10.1016/j.it.2005.07.001}, abstract = {Early B-cell factor (EBF) is a DNA-binding protein required for B-cell lymphopoiesis. The lack of EBF results in an early developmental blockade, including the lack of functional B cells and Igs. Recent studies have elucidated a central role for EBF in the specification of B-lineage cells. EBF directs progenitor cells to undergo B lymphopoiesis and activates transcription of B cell-specific genes in the absence of upstream regulators. How EBF mediates these effects has yet to be thoroughly explored, however, it initiates epigenetic modifications necessary for gene activation and the function of other transcriptional regulators, including Pax5. Together, these observations suggest a molecular basis for the role of EBF in the hierarchical network of factors that control B lymphopoiesis.}, language = {en}, number = {9}, urldate = {2019-11-13}, journal = {Trends in Immunology}, author = {Hagman, James and Lukin, Kara}, month = sep, year = {2005}, pages = {455--461}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/TPM8E4B8/Hagman and Lukin - 2005 - Early B-cell factor ‘pioneers’ the way for B-cell .pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Y998GZ4Q/S1471490605001808.html:text/html} } @article{boller_pioneering_2016, title = {Pioneering {Activity} of the {C}-{Terminal} {Domain} of {EBF}1 {Shapes} the {Chromatin} {Landscape} for {B} {Cell} {Programming}}, volume = {44}, issn = {1074-7613}, url = {http://www.sciencedirect.com/science/article/pii/S1074761316300590}, doi = {10.1016/j.immuni.2016.02.021}, abstract = {Lymphopoiesis requires the activation of lineage-specific genes embedded in naive, inaccessible chromatin or in primed, accessible chromatin. The mechanisms responsible for de novo gain of chromatin accessibility, known as “pioneer” function, remain poorly defined. Here, we showed that the EBF1 C-terminal domain (CTD) is required for the regulation of a specific gene set involved in B cell fate decision and differentiation, independently of activation and repression functions. Using genome-wide analysis of DNaseI hypersensitivity and DNA methylation in multipotent Ebf1−/− progenitors and derivative EBF1wt- or EBF1ΔC-expressing cells, we found that the CTD promoted chromatin accessibility and DNA demethylation in previously naive chromatin. The CTD allowed EBF1 to bind at inaccessible genomic regions that offer limited co-occupancy by other transcription factors, whereas the CTD was dispensable for EBF1 binding at regions that are occupied by multiple transcription factors. Thus, the CTD enables EBF1 to confer permissive lineage-specific changes in progenitor chromatin landscape.}, language = {en}, number = {3}, urldate = {2019-11-13}, journal = {Immunity}, author = {Boller, Sören and Ramamoorthy, Senthilkumar and Akbas, Duygu and Nechanitzky, Robert and Burger, Lukas and Murr, Rabih and Schübeler, Dirk and Grosschedl, Rudolf}, month = mar, year = {2016}, keywords = {pioneer transcription factor, chromatin, B lymphopoiesis, DNA methylation, DNaseI hypersensitivity, EBF1}, pages = {527--541}, file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/EGMEHDT2/Boller et al. - 2016 - Pioneering Activity of the C-Terminal Domain of EB.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Y5I8DSUC/S1074761316300590.html:text/html} } @article{maier_early_2004, title = {Early {B} cell factor cooperates with {Runx}1 and mediates epigenetic changes associated with mb-1 transcription}, volume = {5}, copyright = {2004 Nature Publishing Group}, issn = {1529-2916}, url = {https://www.nature.com/articles/ni1119}, doi = {10.1038/ni1119}, abstract = {Cd79a (called mb-1 here) encodes the Ig-α signaling component of the B cell receptor. The early B cell–specific mb-1 promoter was hypermethylated at CpG dinucleotides in hematopoietic stem cells but became progressively unmethylated as B cell development proceeded. The transcription factor Pax5 activated endogenous mb-1 transcription in a plasmacytoma cell line, but could not when the promoter was methylated. In this context, early B cell factor (EBF), a transcription factor required for B lymphopoiesis, potentiated activation of mb-1 by Pax5. EBF and the basic helix-loop-helix transcription factor E47 each contributed to epigenetic modifications of the mb-1 promoter, including CpG demethylation and nucleosomal remodeling. EBF function was enhanced by interaction with the transcription factor Runx1. These data suggest a molecular basis for the hierarchical dependence of Pax5 function on EBF and E2A in B lymphocyte development.}, language = {en}, number = {10}, urldate = {2019-11-14}, journal = {Nature Immunology}, author = {Maier, Holly and Ostraat, Rachel and Gao, Hua and Fields, Scott and Shinton, Susan A. and Medina, Kay L. and Ikawa, Tomokatsu and Murre, Cornelis and Singh, Harinder and Hardy, Richard R. and Hagman, James}, month = oct, year = {2004}, pages = {1069--1077}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/L3YJ5LS9/Maier et al. - 2004 - Early B cell factor cooperates with Runx1 and medi.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9DE4QLC8/ni1119.html:text/html} } @article{kent_blatblast-like_2002, title = {{BLAT}—{The} {BLAST}-{Like} {Alignment} {Tool}}, volume = {12}, issn = {1088-9051, 1549-5469}, url = {http://genome.cshlp.org/content/12/4/656}, doi = {10.1101/gr.229202}, abstract = {Analyzing vertebrate genomes requires rapid mRNA/DNA and cross-species protein alignments. A new tool, BLAT, is more accurate and 500 times faster than popular existing tools for mRNA/DNA alignments and 50 times faster for protein alignments at sensitivity settings typically used when comparing vertebrate sequences. BLAT's speed stems from an index of all nonoverlapping K-mers in the genome. This index fits inside the RAM of inexpensive computers, and need only be computed once for each genome assembly. BLAT has several major stages. It uses the index to find regions in the genome likely to be homologous to the query sequence. It performs an alignment between homologous regions. It stitches together these aligned regions (often exons) into larger alignments (typically genes). Finally, BLAT revisits small internal exons possibly missed at the first stage and adjusts large gap boundaries that have canonical splice sites where feasible. This paper describes how BLAT was optimized. Effects on speed and sensitivity are explored for various K-mer sizes, mismatch schemes, and number of required index matches. BLAT is compared with other alignment programs on various test sets and then used in several genome-wide applications. http://genome.ucsc.edu hosts a web-basedBLAT server for the human genome.}, language = {en}, number = {4}, urldate = {2019-11-15}, journal = {Genome Research}, author = {Kent, W. James}, month = apr, year = {2002}, pmid = {11932250}, pages = {656--664}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/X6ZVQ37E/Kent - 2002 - BLAT—The BLAST-Like Alignment Tool.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/IAR97Q78/656.html:text/html} } @article{kurotaki_transcriptional_2017, title = {Transcriptional control of monocyte and macrophage development}, volume = {29}, issn = {0953-8178}, url = {https://academic.oup.com/intimm/article/29/3/97/3098307}, doi = {10.1093/intimm/dxx016}, abstract = {The origins, tissues and transcription networks controlling monocytes/macrophages}, language = {en}, number = {3}, urldate = {2019-11-18}, journal = {International Immunology}, author = {Kurotaki, Daisuke and Sasaki, Haruka and Tamura, Tomohiko}, month = mar, year = {2017}, pages = {97--107}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/53GICCP4/Kurotaki et al. - 2017 - Transcriptional control of monocyte and macrophage.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/H8RHN7VY/3098307.html:text/html} } @article{dekkers_human_2019, title = {Human monocyte-to-macrophage differentiation involves highly localized gain and loss of {DNA} methylation at transcription factor binding sites}, volume = {12}, issn = {1756-8935}, url = {https://doi.org/10.1186/s13072-019-0279-4}, doi = {10.1186/s13072-019-0279-4}, abstract = {Macrophages and their precursors monocytes play a key role in inflammation and chronic inflammatory disorders. Monocyte-to-macrophage differentiation and activation programs are accompanied by significant epigenetic remodeling where DNA methylation associates with cell identity. Here we show that DNA methylation changes characteristic for monocyte-to-macrophage differentiation occur at transcription factor binding sites, and, in contrast to what was previously described, are generally highly localized and encompass both losses and gains of DNA methylation.}, number = {1}, urldate = {2019-11-18}, journal = {Epigenetics \& Chromatin}, author = {Dekkers, Koen F. and Neele, Annette E. and Jukema, J. Wouter and Heijmans, Bastiaan T. and de Winther, Menno P. J.}, month = jun, year = {2019}, pages = {34}, file = {Full Text:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/9MNQKL4L/Dekkers et al. - 2019 - Human monocyte-to-macrophage differentiation invol.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/HV9LQVAS/s13072-019-0279-4.html:text/html} } @article{rico_comparative_2017, title = {Comparative analysis of neutrophil and monocyte epigenomes}, copyright = {© 2017, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, url = {https://www.biorxiv.org/content/10.1101/237784v1}, doi = {10.1101/237784}, abstract = {{\textless}h3{\textgreater}ABSTRACT{\textless}/h3{\textgreater} {\textless}p{\textgreater}Neutrophils and monocytes provide a first line of defense against infections as part of the innate immune system. Here we report the integrated analysis of transcriptomic and epigenetic landscapes for circulating monocytes and neutrophils with the aim to enable downstream interpretation and functional validation of key regulatory elements in health and disease. We collected RNA-seq data, ChIP-seq of six histone modifications and of DNA methylation by bisulfite sequencing at base pair resolution from up to 6 individuals per cell type. Chromatin segmentation analyses suggested that monocytes have a higher number of cell-specific enhancer regions (4-fold) compared to neutrophils. This highly plastic epigenome is likely indicative of the greater differentiation potential of monocytes into macrophages, dendritic cells and osteoclasts. In contrast, most of the neutrophil-specific features tend to be characterized by repressed chromatin, reflective of their status as terminally differentiated cells. Enhancers were the regions where most of differences in DNA methylation between cells were observed, with monocyte-specific enhancers being generally hypomethylated. Monocytes show a substantially higher gene expression levels than neutrophils, in line with epigenomic analysis revealing that gene more active elements in monocytes. Our analyses suggest that the overexpression of c-Myc in monocytes and its binding to monocyte-specific enhancers could be an important contributor to these differences. Altogether, our study provides a comprehensive epigenetic chart of chromatin states in primary human neutrophils and monocytes, thus providing a valuable resource for studying the regulation of the human innate immune system.{\textless}/p{\textgreater}}, language = {en}, urldate = {2019-11-18}, journal = {bioRxiv}, author = {Rico, Daniel and Martens, Joost HA and Downes, Kate and Carrillo-de-Santa-Pau, Enrique and Pancaldi, Vera and Breschi, Alessandra and Richardson, David and Heath, Simon and Saeed, Sadia and Frontini, Mattia and Chen, Lu and Watt, Stephen and Müller, Fabian and Clarke, Laura and Kerstens, Hindrik HD and Wilder, Steven P. and Palumbo, Emilio and Djebali, Sarah and Raineri, Emanuele and Merkel, Angelika and Esteve-Codina, Anna and Sultan, Marc and Bommel, Alena van and Gut, Marta and Yaspo, Marie-Laure and Rubio, Miriam and Fernandez, José María and Attwood, Anthony and Torre, Victor de la and Royo, Romina and Fragkogianni, Stamatina and Gelpí, Josep Lluis and Torrents, David and Iotchkova, Valentina and Logie, Colin and Aghajanirefah, Ali and Singh, Abhishek A. and Janssen-Megens, Eva M. and Berentsen, Kim and Erber, Wendy and Rendon, Augusto and Kostadima, Myrto and Loos, Remco and Ent, Martijn A. van der and Kaan, Anita and Sharifi, Nilofar and Paul, Dirk S. and Ifrim, Daniela C. and Quintin, Jessica and Love, Michael I. and Pisano, David G. and Burden, Frances and Foad, Nicola and Farrow, Samantha and Zerbino, Daniel R. and Dunham, Ian and Kuijpers, Tacow and Lehrach, Hans and Lengauer, Thomas and Bertone, Paul and Netea, Mihai G. and Vingron, Martin and Beck, Stephan and Flicek, Paul and Gut, Ivo and Ouwehand, Willem H. and Bock, Christoph and Soranzo, Nicole and Guigo, Rodericw and Valencia, Alfonso and Stunnenberg, Hendrik G.}, month = dec, year = {2017}, pages = {237784}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/H24JSGBM/Rico et al. - 2017 - Comparative analysis of neutrophil and monocyte ep.pdf:application/pdf;Rico et al. - 2017 - Supplemental.pdf:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/H24JSGBM/Rico et al. - 2017 - Supplemental.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/YBAUUVC6/237784v1.html:text/html} } @incollection{jolma_methods_2011-1, series = {Subcellular {Biochemistry}}, title = {Methods for {Analysis} of {Transcription} {Factor} {DNA}-{Binding} {Specificity} {In} {Vitro}, {Chapter} 11, {Interactions} of {Transcription} {Factors} with {Chromatin}}, copyright = {©2011 Springer Science+Business Media B.V.}, isbn = {978-90-481-9068-3 978-90-481-9069-0}, url = {http://link.springer.com/chapter/10.1007/978-90-481-9069-0_7}, abstract = {Transcription of genes during development and in response to environmental stimuli is determined by genomic DNA sequence. The DNA sequences regulating transcription are read by sequence-specific transcription factors (TFs) that recognize relatively short sequences, generally between four and twenty base pairs in length. Transcriptional regulation generally requires binding of multiple TFs in close proximity to each other. Mechanistic understanding of transcription in an organism thus requires detailed knowledge of binding affinities of all its TFs to all possible DNA sequences, and the co–operative interactions between the TFs. However, very little is known about such co-operative binding interactions, and even the simple TF-DNA binding information exists only for a very small proportion of all TFs – for example, mammals have approximately 1,300–2,000 TFs [1, 2], yet the largest public databases for TF binding specificity, Jaspar and Uniprobe [3, 4] currently list only approximately 500 moderate to high resolution profiles for human or mouse. This lack of knowledge is in part due to the fact that analysis of TF DNA binding has been laborious and expensive. In this chapter, we review methods that can be used to determine binding specificity of TFs to DNA, mainly focusing on recently developed assays that allow high-resolution analysis of TF binding specificity in relatively high throughput.}, language = {en}, number = {52}, urldate = {2016-03-09}, booktitle = {A {Handbook} of {Transcription} {Factors}}, publisher = {Springer Netherlands}, author = {Jolma, Arttu and Taipale, Jussi}, editor = {Hughes, Timothy R.}, year = {2011}, doi = {10.1007/978-90-481-9069-0_7}, - keywords = {Affinity, Biochemistry, general, Biomedicine general, Cell Biology, Co-operative binding, Nucleic Acid Chemistry, Protein binding microarrays, Protein–DNA interactions, SELEX}, + keywords = {SELEX, Biomedicine general, Biochemistry, general, Nucleic Acid Chemistry, Cell Biology, Protein–DNA interactions, Co-operative binding, Affinity, Protein binding microarrays}, pages = {223--259} } @incollection{jolma_methods_2011-2, series = {Subcellular {Biochemistry}}, title = {Methods for {Analysis} of {Transcription} {Factor} {DNA}-{Binding} {Specificity} {In} {Vitro}, {Chapter} 9, {How} {Transcription} {Factors} {Identify} {Regulatory} {Sites} in {Genomic} {Sequence}}, copyright = {©2011 Springer Science+Business Media B.V.}, isbn = {978-90-481-9068-3 978-90-481-9069-0}, url = {http://link.springer.com/chapter/10.1007/978-90-481-9069-0_7}, abstract = {Transcription of genes during development and in response to environmental stimuli is determined by genomic DNA sequence. The DNA sequences regulating transcription are read by sequence-specific transcription factors (TFs) that recognize relatively short sequences, generally between four and twenty base pairs in length. Transcriptional regulation generally requires binding of multiple TFs in close proximity to each other. Mechanistic understanding of transcription in an organism thus requires detailed knowledge of binding affinities of all its TFs to all possible DNA sequences, and the co–operative interactions between the TFs. However, very little is known about such co-operative binding interactions, and even the simple TF-DNA binding information exists only for a very small proportion of all TFs – for example, mammals have approximately 1,300–2,000 TFs [1, 2], yet the largest public databases for TF binding specificity, Jaspar and Uniprobe [3, 4] currently list only approximately 500 moderate to high resolution profiles for human or mouse. This lack of knowledge is in part due to the fact that analysis of TF DNA binding has been laborious and expensive. In this chapter, we review methods that can be used to determine binding specificity of TFs to DNA, mainly focusing on recently developed assays that allow high-resolution analysis of TF binding specificity in relatively high throughput.}, language = {en}, number = {52}, urldate = {2016-03-09}, booktitle = {A {Handbook} of {Transcription} {Factors}}, publisher = {Springer Netherlands}, author = {Jolma, Arttu and Taipale, Jussi}, editor = {Hughes, Timothy R.}, year = {2011}, doi = {10.1007/978-90-481-9069-0_7}, - keywords = {Affinity, Biochemistry, general, Biomedicine general, Cell Biology, Co-operative binding, Nucleic Acid Chemistry, Protein binding microarrays, Protein–DNA interactions, SELEX}, + keywords = {SELEX, Biomedicine general, Biochemistry, general, Nucleic Acid Chemistry, Cell Biology, Protein–DNA interactions, Co-operative binding, Affinity, Protein binding microarrays}, pages = {193--204} } @article{hyun_writing_2017, title = {Writing, erasing and reading histone lysine methylations}, volume = {49}, copyright = {2017 The Author(s)}, issn = {2092-6413}, url = {https://www.nature.com/articles/emm201711}, doi = {10.1038/emm.2017.11}, abstract = {Histone modifications are key epigenetic regulatory features that have important roles in many cellular events. Lysine methylations mark various sites on the tail and globular domains of histones and their levels are precisely balanced by the action of methyltransferases (‘writers’) and demethylases (‘erasers’). In addition, distinct effector proteins (‘readers’) recognize specific methyl-lysines in a manner that depends on the neighboring amino-acid sequence and methylation state. Misregulation of histone lysine methylation has been implicated in several cancers and developmental defects. Therefore, histone lysine methylation has been considered a potential therapeutic target, and clinical trials of several inhibitors of this process have shown promising results. A more detailed understanding of histone lysine methylation is necessary for elucidating complex biological processes and, ultimately, for developing and improving disease treatments. This review summarizes enzymes responsible for histone lysine methylation and demethylation and how histone lysine methylation contributes to various biological processes. Elucidating how enzymes add and subtract methyl groups to the proteins that package DNA could lead to new disease treatments. In a review article, Jaehoon Kim and colleagues from the Korea Advanced Institute of Science and Technology in Daejeon, South Korea, summarize the mechanisms by which histone proteins, which assemble to form spool-like complexes around which DNA wraps into more compact units, are modified and how misregulation of this process can lead to cancer, developmental defects and other health problems. The authors focus on how methyl groups are added to and removed from residues of the amino acid lysine within histone proteins. They liken certain enzymes to writers, erasers and readers of histone lysine methylation, and make the case that these enzymes should be investigated as potential therapeutic targets.}, language = {en}, number = {4}, urldate = {2019-11-25}, journal = {Experimental \& Molecular Medicine}, author = {Hyun, Kwangbeom and Jeon, Jongcheol and Park, Kihyun and Kim, Jaehoon}, month = apr, year = {2017}, pages = {e324--e324}, file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/S88BVSYX/Hyun et al. - 2017 - Writing, erasing and reading histone lysine methyl.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/5BCLSXCP/emm201711.html:text/html} +} + +@article{west_nucleosomal_2014, + title = {Nucleosomal occupancy changes locally over key regulatory regions during cell differentiation and reprogramming}, + volume = {5}, + copyright = {2014 The Author(s)}, + issn = {2041-1723}, + url = {https://www.nature.com/articles/ncomms5719}, + doi = {10.1038/ncomms5719}, + abstract = {Changes in chromatin structure impact gene expression programs by modulating accessibility to the transcription machinery. Here, West et al. explore differences in nucleosome occupancy between mammalian pluripotent and somatic cells and uncover regulatory regions likely to play key roles in determining cell identity.}, + language = {en}, + number = {1}, + urldate = {2019-11-26}, + journal = {Nature Communications}, + author = {West, Jason A. and Cook, April and Alver, Burak H. and Stadtfeld, Matthias and Deaton, Aimee M. and Hochedlinger, Konrad and Park, Peter J. and Tolstorukov, Michael Y. and Kingston, Robert E.}, + month = aug, + year = {2014}, + pages = {1--12}, + file = {Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/QVN9IDUT/West et al. - 2014 - Nucleosomal occupancy changes locally over key reg.pdf:application/pdf;Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/Q4MVURG5/ncomms5719.html:text/html} +} + +@article{kubik_nucleosome_2015, + title = {Nucleosome {Stability} {Distinguishes} {Two} {Different} {Promoter} {Types} at {All} {Protein}-{Coding} {Genes} in {Yeast}}, + volume = {60}, + issn = {1097-2765}, + url = {http://www.sciencedirect.com/science/article/pii/S1097276515007698}, + doi = {10.1016/j.molcel.2015.10.002}, + abstract = {Previous studies indicate that eukaryotic promoters display a stereotypical chromatin landscape characterized by a well-positioned +1 nucleosome near the transcription start site and an upstream −1 nucleosome that together demarcate a nucleosome-free (or -depleted) region. Here we present evidence that there are two distinct types of promoters distinguished by the resistance of the −1 nucleosome to micrococcal nuclease digestion. These different architectures are characterized by two sequence motifs that are broadly deployed at one set of promoters where a nuclease-sensitive (“fragile”) nucleosome forms, but concentrated in a narrower, nucleosome-free region at all other promoters. The RSC nucleosome remodeler acts through the motifs to establish stable +1 and −1 nucleosome positions, while binding of a small set of general regulatory (pioneer) factors at fragile nucleosome promoters plays a key role in their destabilization. We propose that the fragile nucleosome promoter architecture is adapted for regulation of highly expressed, growth-related genes.}, + language = {en}, + number = {3}, + urldate = {2019-11-26}, + journal = {Molecular Cell}, + author = {Kubik, Slawomir and Bruzzone, Maria Jessica and Jacquet, Philippe and Falcone, Jean-Luc and Rougemont, Jacques and Shore, David}, + month = nov, + year = {2015}, + pages = {422--434}, + file = {ScienceDirect Full Text PDF:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/KFG8ARBW/Kubik et al. - 2015 - Nucleosome Stability Distinguishes Two Different P.pdf:application/pdf;ScienceDirect Snapshot:/home/groux/.zotero/zotero/mgp9vo2c.default/zotero/storage/U7XBQFJN/S1097276515007698.html:text/html} +} + +@article{dreos_influence_2016, + title = {Influence of {Rotational} {Nucleosome} {Positioning} on {Transcription} {Start} {Site} {Selection} in {Animal} {Promoters}}, + volume = {12}, + issn = {1553-7358}, + url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005144}, + doi = {10.1371/journal.pcbi.1005144}, + abstract = {The recruitment of RNA-Pol-II to the transcription start site (TSS) is an important step in gene regulation in all organisms. Core promoter elements (CPE) are conserved sequence motifs that guide Pol-II to the TSS by interacting with specific transcription factors (TFs). However, only a minority of animal promoters contains CPEs. It is still unknown how Pol-II selects the TSS in their absence. Here we present a comparative analysis of promoters’ sequence composition and chromatin architecture in five eukaryotic model organisms, which shows the presence of common and unique DNA-encoded features used to organize chromatin. Analysis of Pol-II initiation patterns uncovers that, in the absence of certain CPEs, there is a strong correlation between the spread of initiation and the intensity of the 10 bp periodic signal in the nearest downstream nucleosome. Moreover, promoters’ primary and secondary initiation sites show a characteristic 10 bp periodicity in the absence of CPEs. We also show that DNA natural variants in the region immediately downstream the TSS are able to affect both the nucleosome-DNA affinity and Pol-II initiation pattern. These findings support the notion that, in addition to CPEs mediated selection, sequence–induced nucleosome positioning could be a common and conserved mechanism of TSS selection in animals.}, + language = {en}, + number = {10}, + urldate = {2019-11-26}, + journal = {PLOS Computational Biology}, + author = {Dreos, René and Ambrosini, Giovanna and Bucher, Philipp}, + month = oct, + year = {2016}, + keywords = {Chromatin, Sequence motif analysis, Nucleosomes, Invertebrate genomics, Drosophila melanogaster, Nucleosome mapping, Genomic signal processing, Caenorhabditis elegans}, + pages = {e1005144} } \ No newline at end of file diff --git a/tail/cv.aux b/tail/cv.aux index f8739bb..4f32e7c 100644 --- a/tail/cv.aux +++ b/tail/cv.aux @@ -1,49 +1,49 @@ \relax \providecommand\hyper@newdestlabel[2]{} -\@writefile{toc}{\contentsline {chapter}{Curriculum Vitae}{121}{section*.64}} +\@writefile{toc}{\contentsline {chapter}{Curriculum Vitae}{123}{section*.65}} \@setckpt{tail/cv}{ -\setcounter{page}{123} +\setcounter{page}{125} \setcounter{equation}{0} \setcounter{enumi}{13} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} \setcounter{footnote}{0} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{chapter}{1} \setcounter{section}{1} \setcounter{subsection}{0} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} \setcounter{figure}{19} \setcounter{table}{0} -\setcounter{NAT@ctr}{103} +\setcounter{NAT@ctr}{108} \setcounter{FBcaption@count}{0} \setcounter{ContinuedFloat}{0} \setcounter{KVtest}{0} \setcounter{subfigure}{0} \setcounter{subfigure@save}{0} \setcounter{lofdepth}{1} \setcounter{subtable}{0} \setcounter{subtable@save}{0} \setcounter{lotdepth}{1} \setcounter{lips@count}{2} \setcounter{lstnumber}{1} \setcounter{Item}{13} \setcounter{Hfootnote}{0} \setcounter{bookmark@seq@number}{0} \setcounter{AM@survey}{0} \setcounter{ttlp@side}{0} \setcounter{myparts}{0} \setcounter{parentequation}{0} \setcounter{AlgoLine}{39} \setcounter{algocfline}{3} \setcounter{algocfproc}{3} \setcounter{algocf}{3} \setcounter{float@type}{8} \setcounter{nlinenum}{0} \setcounter{lstlisting}{0} \setcounter{section@level}{0} }