diff --git a/src/main_test.cpp b/src/main_test.cpp deleted file mode 100644 index 5a9e265..0000000 --- a/src/main_test.cpp +++ /dev/null @@ -1,404 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -template -std::ostream& operator << (std::ostream& stream, const std::vector& v) -{ for(const auto& x : v) - { stream << x << " " ; } - return stream ; -} - -template -std::ostream& operator << (std::ostream& stream, const std::unordered_map& m) -{ for(const auto& bucket : m) - { stream << bucket.first << " " << bucket.second << std::endl ; } - return stream ; -} - - -Matrix3D compute_consensus_sequences(const Matrix2D& seq, - const Matrix4D& prob, - size_t class_k) -{ - size_t n_seq = seq.get_nrow() ; - size_t l_seq = seq.get_ncol() ; - // size_t n_class = prob.get_dim()[1] ; - size_t n_shift = prob.get_dim()[2] ; - size_t n_flip = prob.get_dim()[3] ; - bool flip = n_flip == 2 ; - size_t l_model = l_seq - n_shift + 1 ; - - Matrix3D cons_seq(n_seq, l_model, 4, 0.) ; - - // aggregate - for(size_t i=0; i same as doing nothing - if(base == dna::char_to_int('N')) - { continue ; } - int base_rv = 4 - base - 1 ; - // --------- forward --------- - // std::cerr << "cons_seq(" << i << "," << j << "," << base << ") += prob(" << i << "," << class_k << "," << 0 << std::endl ; - cons_seq(i,j,base) += prob(i,class_k,s,0) ; - tot += prob(i,class_k,s,0) ; - - // --------- reverse --------- - if(flip) - { // std::cerr << "cons_seq(" << i << "," << j << "," << base_rv << ") += prob(" << i << "," << class_k << "," << 1 << std::endl ; - cons_seq(i,j,base_rv) += prob(i,class_k,s,1) ; - tot += prob(i,class_k,s,1) ; - } - } - } - } - - // normalize - for(size_t i=0; i char_to_int(const Matrix2D& seq) -{ - size_t n_row = seq.get_nrow() ; - size_t n_col = seq.get_ncol() ; - - Matrix2D iseq(n_row, n_col, 0.) ; - - for(size_t i=0; i sequence_to_consensus(const Matrix2D& seq) -{ - size_t n_row = seq.get_nrow() ; - size_t n_col = seq.get_ncol() ; - size_t n_dim3 = 4 ; // A, C, G, T - - int base_n = dna::char_to_int('N') ; - - Matrix3D cseq(n_row, n_col, n_dim3, 1e-100) ; - - - double prob = 1. - 3*(1e-100) ; // p(base) - p(!base) - - for(size_t i=0; i& cons_seq, - size_t row, - size_t start, - const Matrix2D& model) -{ size_t ncol = cons_seq.get_dim()[1] ; - // size_t dim1 = cons_seq.get_dim()[0] ; - // size_t dim2 = cons_seq.get_dim()[1] ; - size_t dim3 = cons_seq.get_dim()[2] ; - - if(start > ncol - model.get_nrow()) - { char msg[4096] ; - sprintf(msg, "Error! given start (%zu) is too high. Max value is %zu", - start, ncol - model.get_nrow()) ; - throw std::invalid_argument(msg) ; - } - else if(model.get_nrow() > ncol) - { char msg[4096] ; - sprintf(msg, "Error! given model is longer than sequences (%zu / %zu)", - model.get_nrow(), ncol) ; - throw std::invalid_argument(msg) ; - } - else if(model.get_ncol() != 4) - { char msg[4096] ; - sprintf(msg, "Error! given model 2nd dimension is not 4 (%zu)", - model.get_ncol()) ; - throw std::invalid_argument(msg) ; - } - else if(dim3 != 4) - { char msg[4096] ; - sprintf(msg, "Error! given data 3rd dimension is not 4 (%zu)", - dim3) ; - throw std::invalid_argument(msg) ; - } - - /* - - double ll = 0 ; - for(size_t j_seq=from, j_mod=0; j_seq test_consensus(const std::string& file, - size_t n_class, - size_t n_iter, - size_t n_shift, - bool flip, - bool bckg_class, - const std::string& seed, - size_t n_thread) -{ size_t n_flip = flip + 1 ; - - Matrix2D seq(file) ; - // Matrix2D seq(char_to_int(Matrix2D(file))) ; - Matrix3D cseq = sequence_to_consensus(seq) ; - - // partition - EMConsensusSequence em(cseq, - n_class, - n_iter, - n_shift, - flip, - bckg_class, - seed, - n_thread) ; - em.classify() ; - - // get post prob - Matrix4D prob = em.get_post_prob() ; - - // compute models - ConsensusSequenceModelComputer mc(std::move(cseq), - prob, - bckg_class, - n_thread) ; - - Matrix2D model = mc.get_model() ; - - // compute the class prob - size_t n_row = prob.get_dim()[0] ; - std::vector class_prob(n_class, 0.) ; - double p_tot = 0. ; - for(size_t i=0; i model_final(model.get_nrow(), - model.get_ncol() + 1) ; - size_t i_class = 0 ; - for(size_t i=0; i test_sequence(const std::string& file, - size_t n_class, - size_t n_iter, - size_t n_shift, - bool flip, - bool bckg_class, - const std::string& seed, - size_t n_thread) -{ size_t n_flip = flip + 1 ; - - Matrix2D seq(file) ; - // Matrix2D seq(char_to_int(Matrix2D(file))) ; - - // partition - EMSequence em(seq, - n_class, - n_iter, - n_shift, - flip, - bckg_class, - seed, - n_thread) ; - em.classify() ; - - // get post prob - Matrix4D prob = em.get_post_prob() ; - - // compute models - SequenceModelComputer mc(std::move(seq), - prob, - bckg_class, - n_thread) ; - - Matrix2D model = mc.get_model() ; - - // compute the class prob - size_t n_row = prob.get_dim()[0] ; - std::vector class_prob(n_class, 0.) ; - double p_tot = 0. ; - for(size_t i=0; i model_final(model.get_nrow(), - model.get_ncol() + 1) ; - size_t i_class = 0 ; - for(size_t i=0; i(file)).save("sp1_motifs_10e-7_consensussequences.mat3D") ; - - /* - size_t n_class = 2 ; - size_t n_iter = 50 ; - size_t n_shift = 1 ; - bool flip = true ; - bool bckg_class = false ; - std::string seed = "" ; - size_t n_thread = 1 ; - - Matrix2D model_final = test_consensus(file, - n_class, - n_iter, - n_shift, - flip, - bckg_class, - seed, - n_thread) ; - */ - - /* - Matrix2D model_final = test_sequence(file, - n_class, - n_iter, - n_shift, - flip, - bckg_class, - seed, - n_thread) ; - */ - - - /* - Matrix2D model_final = test_aggregate(file) ; - */ - - // std::cout << model_final << std::endl ; - - return EXIT_SUCCESS ; -}