Page MenuHomec4science

MatrixCreatorParallel.cpp
No OneTemporary

File Metadata

Created
Wed, May 1, 13:44

MatrixCreatorParallel.cpp

#include <vector>
#include <utility> // std::pair, std::make_pair()
#include <string>
#include <unordered_map>
#include <seqan/bed_io.h> // BedFileIn
#include <seqan/bam_io.h> // BamFileIn, BamAlignmentRecord
#include <MatrixCreator.hpp>
#include <GenomeRegion.hpp>
#include <typedef.hpp>
std::pair<int, int> MatrixCreator::get_bin_indices(const GenomeRegion& target,
const std::vector<GenomeRegion>& bins)
{ // the bin range and chromosome
GenomeRegion range(bins.front().chromosome,
bins.front().chromosome_idx,
bins.front().start,
bins.back().end) ;
// no overlap
if(not (target | range))
{ return std::make_pair(0,0) ; }
// overlap
else
{ // target goes over all bins
if(target.start <= range.start and
target.end >= range.end)
{ return std::make_pair(0, bins.size()) ; }
// partial overlap
else
{ int bin_start = -1 ;
int bin_end = -1 ;
int bin_size = bins.front().end - bins.front().start ;
// start
if(target.start <= range.start)
{ bin_start = 0 ; }
else
{ bin_start = (target.start - range.start) / bin_size ; }
// end
if(target.end >= range.end)
{ bin_end = bins.size() ; }
else
{ bin_end = ((target.end - 1 - range.start) / bin_size) + 1 ; }
return std::make_pair(bin_start, bin_end) ;
}
}
}
bool MatrixCreator::is_good_read(const seqan::BamAlignmentRecord& record)
{
if(seqan::hasFlagUnmapped(record) or // read unmapped flag
seqan::hasFlagQCNoPass(record) or // not passing QC flag
seqan::hasFlagDuplicate(record)) // PCR duplicate flag
{ return false ; }
return true ;
}
bool MatrixCreator::is_good_pair(const seqan::BamAlignmentRecord& record)
{
if((not seqan::hasFlagMultiple(record)) or // is paired flag
(not seqan::hasFlagAllProper(record))) // each read properly aligned flag
{ return false ; }
if((not seqan::hasFlagFirst(record)) or // read 1st in pair flag
seqan::hasFlagLast(record)) // mate 1st in pair flag
{ return false ; }
// read info
bool read_is_rev = seqan::hasFlagRC(record) ; // read is rev flag
int read_start = record.beginPos ;
// mate info
bool mate_is_rev = seqan::hasFlagNextRC(record) ; // mate is rev flag
int mate_start = record.pNext ;
// qc
if((not this->is_good_read(record)) or
// --> -->
(not read_is_rev and not mate_is_rev) or
// <-- <--
(read_is_rev and mate_is_rev) or
// <-- --> 1/2
((read_is_rev and not mate_is_rev) and (read_start < mate_start)) or
// <-- --> 2/2
((not read_is_rev and mate_is_rev) and (read_start > mate_start)))
{ return false ; }
return true ;
}
MatrixCreator::MatrixCreator(const std::string& bed_file_path,
const std::string& bam_file_path,
const std::string& bai_file_path,
int from,
int to,
int bin_size,
MatrixCreator::methods method,
size_t n_threads)
: from(from), to(to), bin_size(bin_size),
method(method),
relative_bin_coord(),
bed_path(bed_file_path),
bam_path(bam_file_path),
bai_path(bai_file_path),
bed_files(n_threads),
bam_files(n_threads),
bai_files(n_threads),
chrom_map_names(),
matrix_counts(),
matrix_bins(),
n_threads(n_threads),
threads(n_threads)
{ if(this->method != MatrixCreator::methods::FRAGMENT and
this->method != MatrixCreator::methods::FRAGMENT_CENTER and
this->method != MatrixCreator::methods::READ and
this->method != MatrixCreator::methods::READ_ATAC)
{ throw std::invalid_argument("Error! Unrecognized method!") ; }
}
/* Initialize Histogram (table) */
/* The windows or bins are placed such that one window will be
centered at pos 0 (odd window size), -0.5 even (window size).
The whole range [$from,$to] will be shortened to an integer
number of window sizes.
Example: $from = -20, $to = 20, $ win =5;
Windows: [-17,-13], [-12,-8], [-7,-3], [-2,2], [3,7], [8,12], [13,17]
New range: $from = -17, $to =17
*/
void MatrixCreator::compute_relative_bin_coord()
{
int l5_p = 0 ;
int l3_p = 0 ;
/* begin (xb), end (xe), and center position (xe) of window near 0 */
int xb = -this->bin_size/2; ;
int xe = xb + this->bin_size - 1 ;
// int xc = (xb + xe)/2 ; // unused
if (this->from > xb)
{ l5_p = (this->from - xb)/this->bin_size + 1 ; }
else
{ l5_p = -(xb - this->from)/this->bin_size ; }
if (this->to >= xe)
{ l3_p = (this->to - xe)/this->bin_size ; }
else
{ l3_p = -(xe - this->to)/this->bin_size + 1 ; }
/* New range */
this->from = xb + l5_p * this->bin_size;
this->to = xe + l3_p * this->bin_size;
// contains the bin coordinate limits [from,to)
// from is the 1st position within the bin and to the
// first position after the bin.
size_t n_bin = ((this->to-this->from)/this->bin_size) + 1 ;
this->relative_bin_coord = v_pair(n_bin) ;
int inf = this->from ;
int sup = inf + this->bin_size - 1 ;
for(size_t i=0; inf<=to; inf+=this->bin_size, sup+=this->bin_size, i++)
{ this->relative_bin_coord[i] = std::make_pair(inf, sup+1) ; }
}
bool MatrixCreator::is_valid_chromosome(const seqan::BamAlignmentRecord& record,
size_t thread_idx)
{
std::string name = seqan::toCString(
seqan::getContigName(
record, this->bam_files[thread_idx])) ;
if(this->chrom_map_names.find(name) == this->chrom_map_names.end())
{ return false ; }
return true ;
}
void MatrixCreator::open_bam_files()
{ for(auto& bam_file : this->bam_files)
{ if(not seqan::open(bam_file, this->bam_path.c_str()))
{ char msg[4096] ;
sprintf(msg, "cannot open %s", this->bam_path.c_str()) ;
throw std::runtime_error(msg) ;
}
}
}
void MatrixCreator::open_bai_files()
{ for(auto& bai_file : this->bai_files)
{ if(not seqan::open(bai_file, this->bai_path.c_str()))
{ char msg[4096] ;
sprintf(msg, "cannot open %s", this->bai_path.c_str()) ;
throw std::runtime_error(msg) ;
}
}
}
void MatrixCreator::open_bed_files()
{ for(auto& bed_file : this->bed_files)
{ if(not seqan::open(bed_file, this->bed_path.c_str()))
{ char msg[4096] ;
sprintf(msg, "cannot open %s", this->bed_path.c_str()) ;
throw std::runtime_error(msg) ;
}
}
}
void MatrixCreator::close_bam_files()
{ for(auto& bam_file : this->bam_files)
{ seqan::close(bam_file) ; }
}
void MatrixCreator::close_bed_files()
{ for(auto& bed_file : this->bed_files)
{ seqan::close(bed_file) ; }
}

Event Timeline