Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F61649320
dna_utility.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, May 8, 02:15
Size
4 KB
Mime Type
text/x-c
Expires
Fri, May 10, 02:15 (2 d)
Engine
blob
Format
Raw Data
Handle
17540390
Attached To
R8820 scATAC-seq
dna_utility.cpp
View Options
#include<dna_utility.hpp>
#include <string>
#include <unordered_map>
#include <stdexcept> // std::invalid_argument
#include <Matrix2D.hpp>
#include <Matrix3D.hpp>
#include <seqan/seq_io.h> // seqan::SeqFileIn
int dna::map(char base, bool rev_compl)
{
static bool init = false ;
static std::unordered_map<char,int> hash_map ;
static std::unordered_map<char,int> hash_map_rev ;
if(not init)
{ hash_map['A'] = 0 ;
hash_map['a'] = 0 ;
hash_map['C'] = 1 ;
hash_map['c'] = 1 ;
hash_map['G'] = 2 ;
hash_map['g'] = 2 ;
hash_map['T'] = 3 ;
hash_map['t'] = 3 ;
hash_map['N'] = 4 ;
hash_map['n'] = 4 ;
hash_map_rev['A'] = hash_map['T'] ;
hash_map_rev['a'] = hash_map['t'] ;
hash_map_rev['C'] = hash_map['G'] ;
hash_map_rev['c'] = hash_map['g'] ;
hash_map_rev['G'] = hash_map['C'] ;
hash_map_rev['g'] = hash_map['c'] ;
hash_map_rev['T'] = hash_map['A'] ;
hash_map_rev['t'] = hash_map['a'] ;
hash_map_rev['N'] = hash_map['N'] ;
hash_map_rev['n'] = hash_map['n'] ;
init = true ;
}
try
{ if(rev_compl)
{ return hash_map_rev.at(base) ; }
else
{ return hash_map.at(base) ; }
}
// key could not be found
catch(std::out_of_range& e)
{ char msg[256] ;
sprintf(msg, "Error! Invalid DNA base : %c", base) ;
throw std::invalid_argument(msg) ;
}
}
char dna::map(int base, bool rev_compl)
{
static bool init = false ;
static std::unordered_map<int,char> hash_map ;
static std::unordered_map<int,char> hash_map_rev ;
if(not init)
{ hash_map[0] = 'A' ;
hash_map[1] = 'C' ;
hash_map[2] = 'G' ;
hash_map[3] = 'T' ;
hash_map[4] = 'N' ;
hash_map_rev[4] = hash_map[4] ;
hash_map_rev[3] = hash_map[0] ;
hash_map_rev[2] = hash_map[1] ;
hash_map_rev[1] = hash_map[2] ;
hash_map_rev[0] = hash_map[3] ;
init = true ;
}
try
{ if(rev_compl)
{ return hash_map_rev.at(base) ; }
else
{ return hash_map.at(base) ; }
}
// key could not be found
catch(std::out_of_range& e)
{ char msg[256] ;
sprintf(msg, "Error! Invalid DNA code : %i", base) ;
throw std::invalid_argument(msg) ;
}
}
int dna::char_to_int(char c, bool rev_compl)
{ return dna::map(c, rev_compl) ; }
Matrix2D<int> dna::char_to_int(const Matrix2D<int>& matrix)
{
size_t n_row = matrix.get_nrow() ;
size_t n_col = matrix.get_ncol() ;
Matrix2D<int> data_int(n_row, n_col) ;
for(size_t i=0; i<n_row; i++)
{ for(size_t j=0; j<n_col; j++)
{ data_int(i,j) = dna::char_to_int(matrix(i,j)) ; }
}
return data_int ;
}
char dna::int_to_char(int n, bool rev_compl)
{ return dna::map(n, rev_compl) ; }
std::vector<double> dna::base_composition(const Matrix2D<int>& sequences, bool both_strands)
{
double total = 0. ;
std::vector<double> base_comp(4,0.) ;
int base_N = dna::map('N') ;
for(size_t i=0; i<sequences.get_nrow(); i++)
{ for(size_t j=0; j<sequences.get_ncol(); j++)
{ // forward strand
int base = sequences(i,j) ;
// do not account for N's
if(base == base_N)
{ continue ; }
else
{ base_comp[base] += 1;
total += 1. ;
}
// reverse complement strand
if(both_strands)
{ // size_t c_hash_rev = dna::hash(c, true) ;
base_comp[4-base-1] += 1. ;
total += 1. ;
}
}
}
// normalize
for(auto& i : base_comp)
{ i /= total ; }
return base_comp ;
}
std::vector<double> dna::base_composition(const Matrix3D<double>& consensus_sequences, bool both_strands)
{
if(consensus_sequences.get_dim()[2] != 4)
{ char msg[4096] ;
sprintf(msg, "Error! consensus sequences 3rd dimension not equal to 4 (%zu)",
consensus_sequences.get_dim()[2]) ;
throw std::invalid_argument(msg) ;
}
double total = 0. ;
std::vector<double> base_comp(4,0.) ;
for(size_t i=0; i<consensus_sequences.get_dim()[0]; i++)
{ for(size_t j=0; j<consensus_sequences.get_dim()[1]; j++)
{ for(size_t k=0; k<4; k++)
{ // forward strand
{ base_comp[k] += consensus_sequences(i,j,k) ;
total += consensus_sequences(i,j,k) ;
}
// revers strand
if(both_strands)
{ size_t k_comp = 4 - k - 1 ;
base_comp[k_comp] += consensus_sequences(i,j,k) ;
total += consensus_sequences(i,j,k) ;
}
}
}
}
// normalize
for(auto& i : base_comp)
{ i /= total ; }
return base_comp ;
}
Event Timeline
Log In to Comment