Page MenuHomec4science

ch_softwares.aux
No OneTemporary

File Metadata

Created
Fri, May 17, 17:59

ch_softwares.aux

\relax
\providecommand\hyper@newdestlabel[2]{}
\citation{ambrosini_pwmscan:_2018}
\citation{ambrosini_pwmscan:_2018}
\citation{ambrosini_pwmscan:_2018}
\citation{khan_jaspar_2018}
\citation{kulakovskiy_hocomoco:_2018}
\@writefile{toc}{\contentsline {chapter}{\numberline {1}Published softwares}{3}{chapter.1}}
\@writefile{lof}{\addvspace {10\p@ }}
\@writefile{lot}{\addvspace {10\p@ }}
\@writefile{loa}{\addvspace {10\p@ }}
\@writefile{toc}{\contentsline {chapter}{Published softwares}{3}{chapter.1}}
\@writefile{toc}{\contentsline {section}{\numberline {1.1}PWMScan}{3}{section.1.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.1}Introduction}{3}{subsection.1.1.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces \textbf {PWMScan workflow :} the input is composed of a PWM and a score threshold specifying the minimum score for a sequence to achieved to be considered as a match. Letter probability matrices or count matrices are also accepted and are converted into PWMs. The score threshold can also be given as a p-value or a percentage of the maximum score, in which case it is converted into a threshold score. Based on the length of the PWM, Bowtie or pwm\_scan can be used to find the matches on the genome. If Bowtie is used, the set of k-mers achieving a better score than the threshold score is computed using branch-and-bound algorithm (mba) and mapped on the genome. On the other hand, if matrix\_scan is used, the PWM is used to score every possible sub-sequence in the genome. The regions corresponding to the sequences achieving a score at least as good as the threshold score are then returned under BED format. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }}{4}{figure.caption.8}}
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{softwares_pwmscan_pipeline}{{1.1}{4}{\textbf {PWMScan workflow :} the input is composed of a PWM and a score threshold specifying the minimum score for a sequence to achieved to be considered as a match. Letter probability matrices or count matrices are also accepted and are converted into PWMs. The score threshold can also be given as a p-value or a percentage of the maximum score, in which case it is converted into a threshold score. Based on the length of the PWM, Bowtie or pwm\_scan can be used to find the matches on the genome. If Bowtie is used, the set of k-mers achieving a better score than the threshold score is computed using branch-and-bound algorithm (mba) and mapped on the genome. On the other hand, if matrix\_scan is used, the PWM is used to score every possible sub-sequence in the genome. The regions corresponding to the sequences achieving a score at least as good as the threshold score are then returned under BED format. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }{figure.caption.8}{}}
\citation{ambrosini_pwmscan:_2018}
\citation{langmead_ultrafast_2009}
\citation{bailey_meme_2009}
\citation{ambrosini_pwmscan:_2018}
\citation{ambrosini_pwmscan:_2018}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.2}Data and methods}{5}{subsection.1.1.2}}
\citation{ambrosini_pwmscan:_2018}
\citation{ambrosini_chip-seq_2016}
\citation{ambrosini_pwmscan:_2018}
\citation{ambrosini_pwmscan:_2018}
\citation{ambrosini_pwmscan:_2018}
\citation{ambrosini_pwmscan:_2018}
\citation{ambrosini_pwmscan:_2018}
\citation{hertz_identification_1990}
\citation{beckstette_fast_2006}
\citation{turatsinze_using_2008}
\citation{heinz_simple_2010}
\citation{grant_fimo:_2011}
\citation{beckstette_fast_2006}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.3}Benchmark}{6}{subsection.1.1.3}}
\@writefile{lof}{\contentsline {figure}{\numberline {1.2}{\ignorespaces \textbf {Benchmark :} PWMScan speed performances were measured and compared with 6 other well known genome scanners. In all cases, the h19 genome sequence was scanned with a 19bp CTCF matrix and a 11bp STAT1 matrix, 10 times. The run times are represented as boxplots. For PWMScan, both pwm\_scan and Bowtie strategies were run. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }}{7}{figure.caption.9}}
\newlabel{softwares_pwmscan_benchmark}{{1.2}{7}{\textbf {Benchmark :} PWMScan speed performances were measured and compared with 6 other well known genome scanners. In all cases, the h19 genome sequence was scanned with a 19bp CTCF matrix and a 11bp STAT1 matrix, 10 times. The run times are represented as boxplots. For PWMScan, both pwm\_scan and Bowtie strategies were run. Figure and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }{figure.caption.9}{}}
\@writefile{lot}{\contentsline {table}{\numberline {1.1}{\ignorespaces \textbf {Motif scanning software comparison}. The performances of matrix\_scan were assessed by comparing how many of the regions listed by matrix\_scan were also returned by other programs and if the region scores were comparable. For the percentage of overlap with the match list returned by matrix\_scan, the shorter of the two lists always serves as the reference (100\%). For the score correlations with matrix\_scan scores, the Spearman correlation was used. Table and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }}{7}{table.caption.10}}
\newlabel{softwares_pwmscan_benchmark_table}{{1.1}{7}{\textbf {Motif scanning software comparison}. The performances of matrix\_scan were assessed by comparing how many of the regions listed by matrix\_scan were also returned by other programs and if the region scores were comparable. For the percentage of overlap with the match list returned by matrix\_scan, the shorter of the two lists always serves as the reference (100\%). For the score correlations with matrix\_scan scores, the Spearman correlation was used. Table and legend taken and adapted from \citep {ambrosini_pwmscan:_2018}.\relax }{table.caption.10}{}}
\citation{schones_statistical_2007}
\citation{aerts_toucan:_2003}
\citation{fu_motifviz:_2004}
\citation{zhao_tred:_2005}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1.4}Conclusions}{8}{subsection.1.1.4}}
\citation{groux_spar-k:_2019}
\citation{consortium_integrated_2012}
\citation{kundaje_ubiquitous_2012}
\citation{nair_probabilistic_2014}
\citation{groux_spar-k:_2019}
\citation{groux_spar-k:_2019}
\@writefile{toc}{\contentsline {section}{\numberline {1.2}SPar-K}{9}{section.1.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.1}Introduction}{9}{subsection.1.2.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.2}Methods}{9}{subsection.1.2.2}}
\@writefile{lof}{\contentsline {figure}{\numberline {1.3}{\ignorespaces Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method (Figure \ref {fig_s07}). \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{10}{figure.caption.11}}
\newlabel{softwares_spark_dnase}{{1.3}{10}{Partitioning of DNaseI hypersensitivity profiles around SP1 binding sites in K562 cells. The optimal number of clusters was determined using the elbow method (Figure \ref {fig_s07}). \textbf {A.} Input data based on peak summits provided by ENCODE. \textbf {B.} Same regions clustered, re-aligned and oriented by SPar-K. Clusters 1, 2 and 3 are indicated by colored bars in red, blue, and green, respectively. \textbf {C.} MNase-seq read densities for the same regions, ordered, aligned and oriented as in B. \textbf {D.} Predicted SP1 binding motifs for the same regions, ordered, aligned and oriented as in B. \textbf {E.} Proportion of binding sites within each cluster having a confirmed promoter-associated TSS within +/- 300bp. \textbf {F.} Aggregations profiles for DNase-seq (red), MNase-seq (blue), promoter TSS (green) and CAGE-seq data (violet) for cluster 2 (aligned and oriented as in B). \textbf {G.} Motifs found by MEME-ChIP and Tomtom in the narrow footprints of each cluster. (*) known SP1 interactor, (c) central enrichment. Cluster 2 left and right refer to the left and right footprints seen in \textbf {B}. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.11}{}}
\citation{groux_spar-k:_2019}
\citation{groux_spar-k:_2019}
\citation{groux_spar-k:_2019}
\citation{groux_spar-k:_2019}
\citation{groux_spar-k:_2019}
\citation{groux_spar-k:_2019}
\citation{groux_spar-k:_2019}
\citation{groux_spar-k:_2019}
\citation{bailey_meme_2009}
\citation{groux_spar-k:_2019}
\citation{kundaje_ubiquitous_2012}
\@writefile{lof}{\contentsline {figure}{\numberline {1.4}{\ignorespaces \textbf {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{11}{figure.caption.12}}
\newlabel{softwares_spark_ari}{{1.4}{11}{\textbf {Clustering accuracy using random seeding :} to compare the clustering accuracies of the different methods, several simulated dataset containing 3 classes, different coverages (10, 50 and 100 reads per region indicated as "cov10", "cov50" and "cov100") and noise proportions (no noise, 10\% noise, 50\% noise and 90\% noise indicated as "0.0", "0.1", "0.5" and "0.9") were generated. Each dataset was clustered 50 times with each method. The Adjusted Rand Index (ARI) was computed for each partition. The ARI values are displayed as boxplots. SPar-K and ChIPPartitioning were run allowing flipping and shifting. The ARI was measured on each of the resulting data partitions. For SPar-K, "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. "R" stands for "random" and indicates the ARI values obtained when comparing the true cluster labels with a randomly shuffled version of it, 100 times. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.12}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {1.5}{\ignorespaces \textbf {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf {A} Seeding done at random, \textbf {B} seeding done at random and outlier smoothing \textbf {C} seeding done with the K-means++ method \textbf {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{12}{figure.caption.13}}
\newlabel{softwares_spark_sse}{{1.5}{12}{\textbf {Median SSE :} for the simulated ChIP-seq dataset containing 3 classes, with coverage 100 and no noise, partitioned into 2 to 5 clusters. To judge whether the elbow method could be used to estimate the optimal number of clusters, this dataset was partitioned with SPar-K, allowing flip and shifting, into 2 to 5 clusters, 50 times for each set of parameters. For each number of clusters, the median SSE is shown, +/- 1 standard deviation (bars). \textbf {A} Seeding done at random, \textbf {B} seeding done at random and outlier smoothing \textbf {C} seeding done with the K-means++ method \textbf {D} seeding done with the K-means++ method and outlier smoothing. In all cases, the optimal number of clusters seemed to be 3 (which was the expected value). Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.13}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {1.6}{\ignorespaces \textbf {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }}{12}{figure.caption.14}}
\newlabel{softwares_spark_time}{{1.6}{12}{\textbf {Running times :} to compare the run times of each program, the synthetic dataset with coverage 100 and no noise was partitioned 20 times with each program. The run times (wall clock) in second were measured. For all SPar-K and the regular K-means, the partitions were initialized using a random and K-means++ (indicated as "k++"). For ChIPPartitioning, only a random seeding was used. The partitions were then optimized for 30 iterations at most. For SPar-K and ChIPPartitioning, a shifting of 71 bins and flipping were allowed. For SPar-K, only one thread was used and "smooth" indicates outlier smoothing. For the regular K-means, "eucl." and "corr." refer to the euclidean and correlation distances. Figure and legend taken and adapted from \citep {groux_spar-k:_2019}.\relax }{figure.caption.14}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.3}Results}{13}{subsection.1.2.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2.4}Conclusion}{13}{subsection.1.2.4}}
\@setckpt{main/ch_softwares}{
\setcounter{page}{14}
\setcounter{equation}{0}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{chapter}{1}
\setcounter{section}{2}
\setcounter{subsection}{4}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{6}
\setcounter{table}{1}
\setcounter{NAT@ctr}{0}
\setcounter{FBcaption@count}{0}
\setcounter{ContinuedFloat}{0}
\setcounter{KVtest}{0}
\setcounter{subfigure}{0}
\setcounter{subfigure@save}{0}
\setcounter{lofdepth}{1}
\setcounter{subtable}{0}
\setcounter{subtable@save}{0}
\setcounter{lotdepth}{1}
\setcounter{lips@count}{2}
\setcounter{lstnumber}{1}
\setcounter{Item}{0}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{0}
\setcounter{AM@survey}{0}
\setcounter{ttlp@side}{0}
\setcounter{myparts}{0}
\setcounter{parentequation}{0}
\setcounter{AlgoLine}{0}
\setcounter{algocfline}{0}
\setcounter{algocfproc}{0}
\setcounter{algocf}{0}
\setcounter{float@type}{8}
\setcounter{nlinenum}{0}
\setcounter{lstlisting}{0}
\setcounter{section@level}{0}
}

Event Timeline