Page MenuHomec4science

ClassCorrelationMatrixCreatorApplication.cpp
No OneTemporary

File Metadata

Created
Thu, Oct 31, 14:24

ClassCorrelationMatrixCreatorApplication.cpp

#include <ClassCorrelationMatrixCreatorApplication.hpp>
#include <CorrelationMatrixCreator.hpp>
#include <boost/program_options.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <stdexcept> // std::invalid_argument
#include <Matrix4D.hpp>
namespace po = boost::program_options ;
// the valid values for --method option
std::string method_read = "read" ;
std::string method_read_atac = "read_atac" ;
std::string method_fragment = "fragment" ;
std::string method_fragment_center = "fragment_center" ;
ClassCorrelationMatrixCreatorApplication::ClassCorrelationMatrixCreatorApplication(int argn, char** argv)
: file_bed(""), file_bam(""), file_bai(""), file_prob(""),
from(0), to(0), bin_size(0), class_k(0),
method(CorrelationMatrixCreator::FRAGMENT), runnable(true)
{
// parse command line options and set the fields
this->parseOptions(argn, argv) ;
}
int ClassCorrelationMatrixCreatorApplication::run()
{ if(this->runnable)
{
// load posterior prob
Matrix4D<double> prob(this->file_prob) ;
// prob.load(this->file_prob) ;
size_t n_row = prob.get_dim()[0] ;
size_t n_class = prob.get_dim()[1] ;
size_t n_shift = prob.get_dim()[2] ;
size_t n_flip = prob.get_dim()[3] ;
bool flip = n_flip == 2 ;
// compute class prob
std::vector<double> prob_colsum(n_class, 0.) ;
double tot = 0. ;
for(size_t i=0; i<n_row; i++)
{ for(size_t j=0; j<n_class; j++)
{ for(size_t k=0; k<n_shift; k++)
{ for(size_t l=0; l<n_flip; l++)
{ double p = prob(i,j,k,l) ;
prob_colsum[j] += p ;
tot += p ;
}
}
}
}
for(auto& p : prob_colsum)
{ p /= tot ; }
// class of interest (1 -> 0-based)
this->class_k -= 1 ;
if(this->class_k > n_class)
{ char msg[4096] ;
sprintf(msg, "Error! invalid class of interest (%zu and %zu found)",
this->class_k, n_class) ;
throw std::invalid_argument(msg) ;
}
// extend limits
int ext = n_shift - 1 ;
int ext_r = ext / 2 ;
int ext_l = ext - ext_r ;
int from2 = this->from - ext_l ;
int to2 = this->to + ext_r ;
CorrelationMatrixCreator mc(this->file_bed,
this->file_bam,
this->file_bai,
from2,
to2,
this->bin_size,
this->method) ;
Matrix2D<int> data2 = mc.create_matrix() ;
size_t n_col2 = data2.get_ncol() ;
// realign matrix
size_t n_col3 = this->to - this->from + 1 ;
Matrix2D<double> data3(n_row,
n_col3,
0.) ;
for(size_t i=0; i<n_row; i++)
{ for(size_t s=0; s<n_shift; s++)
{ // --------------- forward ---------------
int from_dat2_fw = s ;
int to_dat2_fw = from_dat2_fw + n_col3 - 1 ;
for(int j_dat2_fw=from_dat2_fw, j_dat3_fw=0;
j_dat2_fw<=to_dat2_fw;
j_dat2_fw++, j_dat3_fw++)
{ data3(i,j_dat3_fw) +=
(prob(i,class_k,s,0) *
data2(i,j_dat2_fw)) /
prob_colsum[class_k] ;
}
// --------------- reverse ---------------
if(flip)
{ int from_dat2_rev = n_col2 - 1 - s ;
int to_dat2_rev = from_dat2_rev - (n_col3 - 1) ;
for(int j_dat2_rev=from_dat2_rev, j_dat3_fw=0;
j_dat2_rev >= to_dat2_rev;
j_dat2_rev--, j_dat3_fw++)
{ data3(i,j_dat3_fw) +=
(prob(i,class_k,s,1) *
data2(i,j_dat2_rev)) /
prob_colsum[class_k] ;
}
}
}
}
// clean memory
prob = Matrix4D<double>() ;
// convert to integer
Matrix2D<int> data4(data3.get_nrow(),
data3.get_ncol()) ;
for(size_t i=0; i< data4.get_nrow(); i++)
{ for(size_t j=0; j<data4.get_ncol(); j++)
{ data4(i,j) = (int)data3(i,j) ; }
}
// display integer matrix
std::cout << data4 << std::endl ;
return EXIT_SUCCESS ;
}
else
{ return EXIT_FAILURE ; }
}
void ClassCorrelationMatrixCreatorApplication::parseOptions(int argn, char** argv)
{
// no option to parse
if(argv == nullptr)
{ std::string message = "no options to parse!" ;
throw std::invalid_argument(message) ;
}
// help messages
std::string desc_msg = "\n"
"ClassCorrelationMatrixCreator is an application that retro-compute\n"
"a fictive count matrix of a given class K, from a partition, assuming\n"
"that the classification is true.\n"
"To construct the final matrix M3, the original matrix M1 that was\n"
"partitioned is computed and extended into a matrix M2. The original\n"
"matrix M1 has L1 column and was partitioned allowing S shifting freedom.\n"
"A number of extra columns E, overall, is added to the matrix M1 (~E/2 on\n"
"each side). The final matrix M3, of L1 columns, is then computed. A row\n"
"of the final matrix M3 is the weighted average of each of the S possibles\n"
"slices of the corresponding row in M2. The weight used are th\n"
"probabilities with which this row was assigned to class K, for each of\n"
"the S slices." ;
std::string opt_help_msg = "Produces this help message." ;
std::string opt_bed_msg = "The path to the BED file containing the references.";
std::string opt_bam_msg = "The path to the BAM file containing the targets.";
std::string opt_bai_msg = "The path to the BAI file containing the BAM file index.";
std::string opt_prob_msg = "The path to the file containing the assignment probabilities\n"
"(the partition)." ;
std::string opt_from_msg = "The most upstream position - in relative coordinate - of the regions\n"
"in the original matrix." ;
std::string opt_to_msg = "The most downstream position - in relative coordinate - of the regions\n"
"in the original matrix." ;
std::string opt_classk_msg = "The index (1-based) of the class of interest." ;
std::string opt_binsize_msg = "The size of the bins, in the original matrix." ;
char tmp[4096] ;
sprintf(tmp,
"How the data in the BAM file should be handled when computing\n"
"the number of counts in each bin.\n"
"\t\"%s\" uses each position within the reads (by default)\n"
"\t\"%s\" uses only the insertion site for ATAC-seq data\n"
"\t\"%s\" uses each position within the fragments\n"
"\t\"%s\" uses only the fragment central positions\n",
method_read.c_str(),
method_read_atac.c_str(),
method_fragment.c_str(),
method_fragment_center.c_str()) ;
std::string opt_method_msg = tmp ;
// option parser
boost::program_options::variables_map vm ;
boost::program_options::options_description desc(desc_msg) ;
std::string method(method_read) ;
desc.add_options()
("help,h", opt_help_msg.c_str())
("bed", po::value<std::string>(&(this->file_bed)), opt_bed_msg.c_str())
("bam", po::value<std::string>(&(this->file_bam)), opt_bam_msg.c_str())
("bai", po::value<std::string>(&(this->file_bai)), opt_bai_msg.c_str())
("prob", po::value<std::string>(&(this->file_prob)), opt_bai_msg.c_str())
("from", po::value<int>(&(this->from)), opt_from_msg.c_str())
("to", po::value<int>(&(this->to)), opt_to_msg.c_str())
("binSize", po::value<int>(&(this->bin_size)), opt_binsize_msg.c_str())
("k", po::value<size_t>(&(this->class_k)), opt_classk_msg.c_str())
("method", po::value<std::string>(&(method)), opt_method_msg.c_str()) ;
// parse
try
{ po::store(po::parse_command_line(argn, argv, desc), vm) ;
po::notify(vm) ;
}
catch(std::invalid_argument& e)
{ std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ;
throw std::invalid_argument(msg) ;
}
catch(...)
{ throw std::invalid_argument("An unknown error occured while parsing the options") ; }
bool help = vm.count("help") ;
// checks unproper option settings
if(this->file_bed == "" and (not help))
{ std::string msg("Error! No BED file was given (--bed)!") ;
throw std::invalid_argument(msg) ;
}
else if(this->file_bam == "" and (not help))
{ std::string msg("Error! No BAM file was given (--bam)!") ;
throw std::invalid_argument(msg) ;
}
else if(this->file_bai == "" and (not help))
{ std::string msg("Error! No BAM index file was given (--bai)!") ;
throw std::invalid_argument(msg) ;
}
else if(this->file_prob == "" and (not help))
{ std::string msg("Error! No probability (partition) file was given (--prob)!") ;
throw std::invalid_argument(msg) ;
}
else if(this->from == 0 and this->to == 0 and (not help))
{ std::string msg("Error! No range given (--from and --to)!") ;
throw std::invalid_argument(msg) ;
}
else if(this->from >= this->to and (not help))
{ std::string msg("Error! from shoud be smaller than to (--from and --to)!") ;
throw std::invalid_argument(msg) ;
}
else if(this->class_k == 0 and (not help))
{ std::string msg("Error! no class of interest was given (--k)!") ;
throw std::invalid_argument(msg) ;
}
else if(this->bin_size <= 0 and (not help))
{ std::string msg("Error! bin size should be bigger than 0 (--binSize)!") ;
throw std::invalid_argument(msg) ;
}
else if(method != method_read and
method != method_read_atac and
method != method_fragment and
method != method_fragment_center)
{ char msg[4096] ;
sprintf(msg, "Error! method should be %s, %s, %s or %s (--method)",
method_read.c_str(),
method_read_atac.c_str(),
method_fragment.c_str(),
method_fragment_center.c_str()) ;
throw std::invalid_argument(msg) ;
}
// set method
if(method == method_read)
{ this->method = CorrelationMatrixCreator::READ ; }
else if(method == method_read_atac)
{ this->method = CorrelationMatrixCreator::READ_ATAC ; }
else if(method == method_fragment)
{ this->method = CorrelationMatrixCreator::FRAGMENT ; }
else if(method == method_fragment_center)
{ this->method = CorrelationMatrixCreator::FRAGMENT_CENTER ; }
// help invoked, run() cannot be invoked
if(help)
{ std::cout << desc << std::endl ;
this->runnable = false ;
return ;
}
// everything fine, run() can be called
else
{ this->runnable = true ;
return ;
}
}
int main(int argn, char** argv)
{ ClassCorrelationMatrixCreatorApplication app(argn, argv) ;
return app.run() ;
}

Event Timeline