diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a4d0210 --- /dev/null +++ b/.gitignore @@ -0,0 +1,43 @@ +# This file is used to ignore files which are generated +# ---------------------------------------------------------------------------- + +*~ +*.autosave +*.a +*.core +*.moc +*.o +*.obj +*.orig +*.rej +*.so +*.so.* +*_pch.h.cpp +*_resource.rc +*.qm +.#* +*.*# +core +!core/ +tags +.DS_Store +.directory +*.debug +Makefile* +*.prl +*.app +moc_*.cpp +ui_*.h +qrc_*.cpp +Thumbs.db +*.res +*.rc +/.qmake.cache +/.qmake.stash +bin/ +CMakeFiles/ +data/ +results/ +CMakeCache.txt +cmake_install.cmake + diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..7f1eca7 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,37 @@ +# project +project(scATACseq) +cmake_minimum_required(VERSION 3.10) + +# static libraries +## boost library +set(BOOST_INCLUDEDIR "/usr/local/include/boost/") +set(BOOST_LIBRARYDIR "/usr/local/lib/boost") +find_package(Boost 1.65 COMPONENTS program_options REQUIRED) + +## UnitTest++ library +## TODO write a FindUnitTest++.cmake file to use find_package() +find_library(UNITTEST_LIB + NAMES "UnitTest++" + PATHS "/usr/local/lib/UnitTest++") +find_path(UNITTEST_INCLUDE + NAMES "UnitTest++.h" + PATHS "/usr/local/include/UnitTest++/") +include_directories(${UNITTEST_INCLUDE}) +# link_directories(${UNITTEST_LIB}) + +## threads +find_package(Threads REQUIRED) + + +# compiler options +add_compile_options(-std=c++11) +add_compile_options(-O3) +add_compile_options(-Wall) +add_compile_options(-Wextra) +add_compile_options(-Werror) +add_compile_options(-Wfatal-errors) +add_compile_options(-pedantic) + + +add_subdirectory(src) + diff --git a/scripts/bulk_sequencing/analysis_cluster_ctcf_mnase_k562.R b/scripts/bulk_sequencing/analysis_cluster_ctcf_mnase_k562.R new file mode 100755 index 0000000..bb60a9b --- /dev/null +++ b/scripts/bulk_sequencing/analysis_cluster_ctcf_mnase_k562.R @@ -0,0 +1,197 @@ + + +# functions + +#' Compute the euclidean distance between two references. +#' It also check if a reference is in reverse orientation +#' and returns the smallest distance value. +#' \param ref1 a vector containing the first reference. +#' \param ref2 a vector containing the second reference. +#' \return the euclidean distance. +eucl.dist.ref = function(ref1, ref2) +{ + return(min(sqrt(sum(((ref1 - ref2 ) ^ 2))), + sqrt(sum(((ref1 - rev(ref2)) ^ 2))))) +} + + +#' Compute the correlation distance between two references. +#' It also check if a reference is in reverse orientation +#' and returns the smallest distance value. +#' \param ref1 a vector containing the first reference. +#' \param ref2 a vector containing the second reference. +#' \return the euclidean distance. +cor.dist.ref= function(ref1, ref2) +{ + return(1 - min(cor(ref1, ref2 ), + cor(ref1, rev(ref2)))) +} + + +#' Computes the distance matrix, using the euclidean distance, for all +#' the references aggregations given. As some references may be in reverse +#' orientation compared to others, the distance in both orientation is +#' computed, for each pair, and the best is returned. +distance.ref = function(references) +{ n = nrow(references) + d = matrix(nrow=n, ncol=n, data=0) + + for(i in 1:n) + { for(j in 1:i) + { x = eucl.dist.ref(references[i,], references[j,]) + d[i,j] = x + d[j,i] = x + } + } + return(d) +} + + +get_matches = function(distances, run_value) +{ + matches = matrix(nrow=0, ncol=4) + + # references of run i on the row -> y coord + # references of run j on the col -> x coord + + # run labels + run_i = 1 + # run_j = 2 + + for(run_j in setdiff(unique(run_value), run_i)) + { + # number of references in each run + n_i = length(which(run_value == run_i)) + n_j = length(which(run_value == run_j)) + + index_i = which(run_value == run_i) # rows of run i + index_j = which(run_value == run_j) # columns of run j + + i_taken = c() # classes of i already plotted -> rows to ignore + j_taken = c() # classes of j already plotted -> columns to ignore + + # while not all classes in j have been plotted + row_n = 1 + while(length(j_taken) < n_j) + { if(length(i_taken) == 0 && + length(j_taken) == 0) + { distances_tmp = distances[index_i, index_j] + coord = which(distances_tmp == min(distances_tmp), arr.ind=T) + coord_i = as.numeric(rownames(distances_tmp)[coord[1]]) + coord_j = as.numeric(colnames(distances_tmp)[coord[2]]) + coord = c(coord_i, coord_j) + } else { + rows = setdiff(index_i, i_taken) + cols = setdiff(index_j, j_taken) + distances_tmp = distances[rows, cols, drop=F] + coord = which(distances_tmp == min(distances_tmp), arr.ind=T) + coord_i = as.numeric(rownames(distances_tmp)[coord[1]]) + coord_j = as.numeric(colnames(distances_tmp)[coord[2]]) + coord = c(coord_i, coord_j) + } + coord = c(coord, row_n, run_j) + i_taken = c(i_taken, coord[1]) + j_taken = c(j_taken, coord[2]) + matches = rbind(matches, coord) + row_n = row_n + 1 + } + } + return(matches) +} + + +plot.references = function(references, distances, n_run, run_value, n_class_max) +{ + colors = brewer.pal(6, "Set1") + + # compute the best matches between all references to 1st run references + matches = get_matches(distances, run_value) + + # make a matrix for layout with good plot numbers + plots.lab = matrix(nrow=n_class_max, ncol=n_run) + plots.lab[,1] = 1:n_class_max # for run with max number of classes + z = n_class_max + 1 + for(i in 1:nrow(matches)) + { coord = matches[i,] + # plots.lab[coord[3], coord[4]] = z + plots.lab[coord[1], coord[4]] = z + z = z + 1 + } + # these will be the empty plots + for(i in 1:nrow(plots.lab)) + { for(j in 1:ncol(plots.lab)) + { if(is.na(plots.lab[i,j])) + { plots.lab[i,j] = z + z = z + 1 + } + } + } + + # plot + X11(height=12, width=10) + # a grid + m = layout(mat = plots.lab) + # layout.show(m) + x = 1:ncol(references) + + # plot run 1 references + for(i in 1:n_class_max) + { plot(x=x, y=references[i,], lwd=3, type='l', col=colors[i], main="", xlab="pos [bp]", ylab="Nb reads") } + + # plot others + for(i in 1:nrow(matches)) + { ref_index = matches[i,2] + col_index = matches[i,3] + plot(x=x, y=references[ref_index,], lwd=3, type='l', col=colors[col_index], main="", xlab="pos [bp]", ylab="Nb reads") + } +} + + + + + +library(RColorBrewer) + +setwd(file.path("/", "local", "groux", "scATAC-seq")) + +data.2 = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_2class.mat"))) +data.3 = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_3class.mat"))) +data.4 = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_4class.mat"))) +data.5 = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_5class.mat"))) +data.6 = as.matrix(read.table(file.path("results", "bulk_sequencing", "ctcf_dnase_k562_6class.mat"))) +data = list(data.6, data.5, data.4, data.3, data.2) + +# some colors +colors = brewer.pal(6, "Set1") + +# number of runs +n_run = length(data) +# number of different classes overall +n_class_tot = sum(unlist(lapply(data, nrow))) +# max value of K +n_class_max = max(unlist(lapply(data, nrow))) + +# construct a matrix with all discovered references on the rows +references = matrix(nrow=n_class_tot, ncol=ncol(data[[1]])) +run_value = vector(length=n_class_tot) +k_value = vector(length=n_class_tot) +k = 1 +for(i in 1:n_run) +{ for(j in 1:nrow(data[[i]])) + { references[k,] = data[[i]][j,] + run_value[k] = i + k_value[k] = j + k = k + 1 + } +} + +# distance matrix between all references +distances = distance.ref(references) +rownames(distances) = 1:nrow(distances) +colnames(distances) = 1:ncol(distances) + + + +plot.references(references, distances, n_run, run_value, n_class_max) +savePlot("tmp_dnase.png") + diff --git a/scripts/bulk_sequencing/cluster_ctcf_dnase_k562.sh b/scripts/bulk_sequencing/cluster_ctcf_dnase_k562.sh new file mode 100755 index 0000000..89cad26 --- /dev/null +++ b/scripts/bulk_sequencing/cluster_ctcf_dnase_k562.sh @@ -0,0 +1,19 @@ + +results_dir='results/bulk_sequencing' +data_dir='data/bulk_sequencing/' + +mkdir -p $results_dir + +file_mnase=$data_dir'/ctcf_dnase_k562.mat' +n_iter='20' +n_shift='21' +seed='12345678' +seeding='random' + +for k in 2 3 4 5 6 +do + file_prob=$results_dir/'ctcf_dnase_k562_'$k'class_prob.mat4d' + file_ref=$results_dir/'ctcf_dnase_k562_'$k'class_ref.mat' + bin/ChIPPartitioning --data $file_mnase --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed -p 4 > $file_prob + bin/probToRef --data $file_mnase --prob $file_prob > $file_ref +done diff --git a/scripts/bulk_sequencing/cluster_ctcf_mnase_k562.sh b/scripts/bulk_sequencing/cluster_ctcf_mnase_k562.sh new file mode 100755 index 0000000..421bbaa --- /dev/null +++ b/scripts/bulk_sequencing/cluster_ctcf_mnase_k562.sh @@ -0,0 +1,19 @@ + +results_dir='results/bulk_sequencing' +data_dir='data/bulk_sequencing/' + +mkdir -p $results_dir + +file_mnase=$data_dir'/ctcf_mnase_k562.mat' +n_iter='20' +n_shift='21' +seed='12345678' +seeding='random' + +for k in 2 3 4 5 6 +do + file_prob=$results_dir/'ctcf_mnase_k562_'$k'class_prob.mat4d' + file_ref=$results_dir/'ctcf_mnase_k562_'$k'class_ref.mat' + bin/ChIPPartitioning --data $file_mnase --class $k --shift $n_shift --flip --iter $n_iter --seeding $seeding --seed $seed -p 4 > $file_prob + bin/probToRef --data $file_mnase --prob $file_prob > $file_ref +done diff --git a/scripts/bulk_sequencing/prepare_data.R b/scripts/bulk_sequencing/prepare_data.R new file mode 100755 index 0000000..ad4b79a --- /dev/null +++ b/scripts/bulk_sequencing/prepare_data.R @@ -0,0 +1,13 @@ + +setwd(file.path("/", "local", "groux", "scATAC-seq")) + +dnase1 = as.matrix(read.table(file.path("data", "bulk_sequencing", "ctcf_dnase_k562_rep1.mat"))) +dnase2 = as.matrix(read.table(file.path("data", "bulk_sequencing", "ctcf_dnase_k562_rep1.mat"))) +dnase3 = as.matrix(read.table(file.path("data", "bulk_sequencing", "ctcf_dnase_k562_rep3.mat"))) + +# sum everything to increase coverage +dnase = dnase1 + dnase2 + dnase3 + +# write the new tables +write.table(dnase, file=file.path("data", "bulk_sequencing", "ctcf_dnase_k562.mat"), col.names=F, row.names=F, quote=F, eol='\n', sep='\t') + diff --git a/scripts/bulk_sequencing/run_all.sh b/scripts/bulk_sequencing/run_all.sh new file mode 100755 index 0000000..3eb4d2e --- /dev/null +++ b/scripts/bulk_sequencing/run_all.sh @@ -0,0 +1,7 @@ +# classify DNaseI data +Rscript scripts/bulk_sequencing/prepare_data.R +scripts/bulk_sequencing/cluster_ctcf_dnase_k562.sh + +# classify MNase data +scripts/bulk_sequencing/cluster_ctcf_mnase_k562.sh + diff --git a/scripts/em.R b/scripts/em.R new file mode 100644 index 0000000..6d1317e --- /dev/null +++ b/scripts/em.R @@ -0,0 +1,118 @@ +seeding_toy = function(data, k, shift) +{ l_slice = ncol(data) - shift + 1 + references = matrix(nrow=k, ncol=l_slice) + for(i in 1:k) + { from = floor(shift / 2) + 1 + to = from + l_slice - 1 + references[i,] = data[i, from:to] + } + return(references) +} + +seeding_random = function(data, n_class, shift) +{ l_slice = ncol(data) - shift + 1 + n_row = nrow(data) + p = matrix(nrow=n_row, ncol=n_class) + for(m in 1:n_class) + { p[,m] = rbeta(n_row, n_row**-0.5, 1) } + c = (t(p) %*% data) / colSums(p) + + p = matrix(nrow=n_row, ncol=n_class) + for(i in 1:n_class) + { p[,i] = rbeta(n_row, n_row**-0.5, 1) } + from = floor(shift/2)+1 + to = from + l_slice - 1 + c = (t(p) %*% data[,from:to]) / colSums(p) + return(c) +} + +em_shape_shift_flip = function(c,q,data) { + + K=dim(c)[1]; + L=dim(c)[2]; + N=dim(data)[1]; + S=dim(q)[2] + l=array(dim=c(N,K,S,2)); + p=array(dim=c(N,K,S,2)) + for(i in 1:K) + { c[i,]=c[i,]/mean(c[i,]) } + rm=matrix(nrow=N, ncol=S) + for(k in 1:S) + { rm[,k] = rowMeans(data[,k:(k+L-1)]) } + for(i in 1:N) + { for (j in 1:K) + { for (k in 1:S) + { l[i,j,k,1]=sum(dpois(data[i,k:(k+L-1)],c[j,]*rm[i,k],log=T)) + l[i,j,k,2]=sum(dpois(rev(data[i,])[k:(k+L-1)],c[j,]*rm[i,S-k+1],log=T)) + # xxx = c(k:(k+L-1)) + # print(c(xxx[1],xxx[length(xxx)])-1) + # for(m in 1:length(xxx)) + # { n = xxx[m] + # print(sprintf("%d %d %d %d: dpois(%d, %f*%f)", i-1, j-1, k-1, n-1, data[i,n], c[j,m], rm[i,k])) + # } + # for(m in 1:length(xxx)) + # { n = xxx[m] + # print(sprintf("%d %d %d %d: dpois(%d, %f*%f)", i-1, j-1, k-1, n-1, rev(data[i,])[n], c[j,m], rm[i,S-k+1])) + # } + } + } + } + # print(" : LOGLIKELIHOOD") + # print(l) + # print(" : LOGLIKELIHOOD MAX") + # print(apply(l, 1, max)) + p_row_sum = vector(length=N) + for(i in 1:N) + { p[i,,,] = q*exp(l[i,,,]-max(l[i,,,])) + p_row_sum[i] = sum(p[i,,,]) + p[i,,,] = p[i,,,]/sum(p[i,,,]) + } + # print(" : POST PROB ROW") + # print(p_row_sum) + # print(" : POST PROB") + # print(p) + + q = apply(p, c(2,3,4), mean) + + c = 0; + for(k in 1:S) + { c = c + (t(p[,,k,1]) %*% data[,k:(k+L-1)]) + + (t(p[,,k,2]) %*% t(apply(data[,(S-k+1):(S+L-k)],1,rev))) + } + + c = c / apply(p,2,sum) + + m= sum((1:S)*apply(q,2,sum)); + s=sum(((1:S)-m)**2*apply(q,2,sum))**0.5 + for (i in 1:K) + { q[i,,1] = q[i,,2] = sum(q[i,,]) * dnorm(1:S,floor(S/2)+1,s) / sum(dnorm(1:S,floor(S/2)+1,s)) / 2 } + + return(list(references=c, class_prob=q, post_prob=p, loglikelihood=l)) +} + +n_iter = 20 +flip = TRUE +n_shift = 21 +n_class = 4 +n_flip = flip + 1 +data = as.matrix(read.table("/local/groux/scATAC-seq/data/ctcf_mnase_encode.txt", h=F)) +# references = seeding_toy(data, n_class, n_shift) +references = seeding_random(data, n_class, n_shift) +m = apply(references, 1, mean) +references = references / m +class_prob = array(dim=c(n_class, n_shift, n_flip), 1) ; class_prob = class_prob / sum(class_prob) + +l = list(references=references, class_prob=class_prob) + +for(i in 1:n_iter) +{ # print("------------------- ITER -------------------") + l = em_shape_shift_flip(l$references, l$class_prob, data) + # print(" :LOGLIKELIHOOD") + # print(l$loglikelihood) + # print(" :POST PROB") + # print(l$post_prob) + # print(" : REFERENCES") + # print(l$references) + # print("-------------------------------------------------") +} +write.table(l$references, quote=F, row.names=F, col.names=F, sep='\t') diff --git a/scripts/plot_references.R b/scripts/plot_references.R new file mode 100644 index 0000000..1c7c10a --- /dev/null +++ b/scripts/plot_references.R @@ -0,0 +1,27 @@ +data_rand = as.matrix(read.table("mnase_random.txt", h=F)) +data_rand_new = as.matrix(read.table("mnase_random_new.txt", h=F)) +data_sampling = as.matrix(read.table("mnase_sampling.txt", h=F)) +data_rand_r = as.matrix(read.table("mnase_R.txt", h=F)) + +par(mfrow=c(2,2)) + +x = 1:ncol(data_rand_r) +plot(x, data_rand_r[1,], type='l', lwd=3, ylim=c(min(data_rand_r), max(data_rand_r)), main="R Random seeding") +for(i in 2:nrow(data_rand_r)) +{lines(x, data_rand_r[i,], lwd=3, col=i) } + +x = 1:ncol(data_rand) +plot(x, data_rand[1,], type='l', lwd=3, ylim=c(min(data_rand), max(data_rand)), main="C++ Random seeding") +for(i in 2:nrow(data_rand)) +{lines(x, data_rand[i,], lwd=3, col=i) } + +x = 1:ncol(data_rand_new) +plot(x, data_rand_new[1,], type='l', lwd=3, ylim=c(min(data_rand_new), max(data_rand_new)), main="C++ New random seeding") +for(i in 2:nrow(data_rand_new)) +{lines(x, data_rand_new[i,], lwd=3, col=i) } + +x = 1:ncol(data_sampling) +plot(x, data_sampling[1,], type='l', lwd=3, ylim=c(min(data_sampling), max(data_sampling)), main="C++ Sampling seeding") +for(i in 2:nrow(data_sampling)) +{lines(x, data_sampling[i,], lwd=3, col=i) } + diff --git a/scripts/run_all.sh b/scripts/run_all.sh new file mode 100755 index 0000000..55b002e --- /dev/null +++ b/scripts/run_all.sh @@ -0,0 +1,2 @@ +scripts/simulate_chipseq_data/run_all.sh + diff --git a/scripts/simulate_chipseq_data/run_all.sh b/scripts/simulate_chipseq_data/run_all.sh new file mode 100755 index 0000000..716b134 --- /dev/null +++ b/scripts/simulate_chipseq_data/run_all.sh @@ -0,0 +1,2 @@ +Rscript scripts/simulate_chipseq_data/simulate_data_chipseq.R + diff --git a/scripts/simulate_chipseq_data/simulate_data_chipseq.R b/scripts/simulate_chipseq_data/simulate_data_chipseq.R new file mode 100755 index 0000000..082cf0a --- /dev/null +++ b/scripts/simulate_chipseq_data/simulate_data_chipseq.R @@ -0,0 +1,133 @@ +#' \brief This function +#' +generate_data_chipseq = function(n_col, shift_max, p_flip, p_noise, coverage, shape_list, nrows_list) +{ # number of datum to generate + n_row = sum(unlist(nrows_list)) + # data structure to store results + data = matrix(0, nrow=n_row, ncol=n_col) + shifts = vector(length=n_row, mode="numeric") + flips = vector(length=n_row, mode="numeric") + classes = vector(length=n_row, mode="numeric") + shapes = matrix(0, nrow=length(shape_list), ncol=n_col) + # the proportion of reads which are signal + p_signal = 1 - p_noise + # noise : a uniform distribution + x = 1:n_col + shape_noise = dunif(x, min=min(x), max=max(x)) + + i_tot = 1 + for(k in 1:length(shape_list)) + { + l = length(shape_list[[k]]) + from_s = floor((n_col-l)/2) + to_s = from_s + l - 1 + + for(i in 1:nrows_list[[k]]) + { + # shift + shifts[i_tot] = ceiling(runif(1,1,shift_max)) + # flip + flips[i_tot] = rbinom(1, 1, p_flip) + # class + classes[i_tot] = k + from = shifts[i_tot] + to = shifts[i_tot] + length(shape_list[[k]]) - 1 + # construct shape given shift and flip + # ensure min value equal to min in shape + shape = rep(min(shape_list[[k]]), n_col) # flat, only lowest possible value + if(flips[i_tot]) + { shape[from:to] = rev(shape_list[[k]]) + tmp = from + from = to + to = tmp + } else { + shape[from:to] = shape_list[[k]] + } + shape = shape*coverage*p_signal + shape_noise*coverage*p_noise # scale to right coverage and add noise + # sample reads from shape + data[i_tot,] = rpois(n_col, shape) + shapes[k,from_s:to_s] = shapes[k,from_s:to_s] + ( data[i_tot, from:to] / nrows_list[[k]]) + + i_tot = i_tot + 1 + } + } + return(list("data"=data, "shifts"=shifts, "flips"=flips, "classes"=classes, "shapes"=shapes)) +} + +setwd(file.path("/", "local", "groux", "scATAC-seq")) + +seed = 20170426 + +dir_data = file.path("data", "simulated_chipseq_data") +dir.create(dir_data, showWarnings=F) +if(!file.exists(dir_data)) +{ dir.create(dir_data) } + + +# general parameter +n_samples = 1000 +n_col = 2001 # the length of a signal vector +shift_max = 100 # the maximum possible shift +p_flip = 0.3 # the prob of having a flipped signal + +# class 1 : a simple gaussian +class1_n = 600 +class1_m = ceiling(n_col/2) - ceiling(shift_max/2) # class 1 mean, mean will be in average in the middle of the data vector +class1_s = 40 # class 1 sd +# the signal shape +shape1 = dnorm(1:(n_col-shift_max+1), class1_m, class1_s) + +# class 2 : half a gaussian +class2_n = n_samples - class1_n +class2_m = floor(n_col/2) - floor(shift_max/2) # class 2 mean, mean will be in average in the middle of the data vector +class2_s = 40 # class 2 sd +# the signal shape +shape2 = dnorm(1:(n_col-shift_max+1), class2_m, class2_s) +shape2[class2_m:length(shape2)] = min(shape2) + +# class 3 : a uniform +class3_n = 333 +class3_from = floor(n_col/2) - floor(shift_max/2) -120 # class 3 from, mean will be in average in the middle of the data vector +class3_to = floor(n_col/2) - floor(shift_max/2) +120 # class 3 to, mean will be in average in the middle of the data vector +# the signal shape +shape3 = dunif(1:(n_col-shift_max+1), class3_from, class3_to) + +# normalize +shape1 = shape1 / sum(shape1) +shape2 = shape2 / sum(shape2) +shape2 = shape2 / sum(shape2) + +# sequencing coverage +# the mean number of read per sample +coverages = c(1, 10, 100) + +# noise proportion +# in the end, the noise is added EVERYWHERE (also on the signal core) +# _ +# /\ | +# | - | proportion of signal +# | - - | +# |_____- -__________ _ +# | | proportion of noise +# ----------------------> _ +noises = c(0) + + + +for(p_noise in noises) # the proportion of reads which are noise +{ for(coverage in coverages) + { + # ----------------------------------------------------- data with 3 classes ----------------------------------------------------- + set.seed(seed) + data = generate_data_chipseq(n_col, shift_max, p_flip, p_noise, coverage, list(shape1, shape2, shape3), list(class3_n, class3_n, class3_n+1)) + # save + write.table(data$data, file=file.path(dir_data, sprintf("simulated_data_3_class_asym_cov%d_noise%.1f.txt", coverage, p_noise)), row.names=F, col.names=F, quote=F) + write.table(data$shifts, file=file.path(dir_data, sprintf("simulated_data_3_class_asym_shifts_cov%d_noise%.1f.txt", coverage, p_noise)), row.names=F, col.names=F, quote=F) + write.table(data$flips, file=file.path(dir_data, sprintf("simulated_data_3_class_asym_flips_cov%d_noise%.1f.txt", coverage, p_noise)), row.names=F, col.names=F, quote=F) + write.table(data$classes, file=file.path(dir_data, sprintf("simulated_data_3_class_asym_classes_cov%d_noise%.1f.txt", coverage, p_noise)), row.names=F, col.names=F, quote=F) + write.table(data$shapes, file=file.path(dir_data, sprintf("simulated_data_3_class_asym_shapes_cov%d_noise%.1f.txt", coverage, p_noise)), row.names=F, col.names=F, quote=F) + # clean + data = shifts = flips = classes = shapes = NULL + } +} + diff --git a/src/Applications/ApplicationInterface.cpp b/src/Applications/ApplicationInterface.cpp new file mode 100644 index 0000000..8290fea --- /dev/null +++ b/src/Applications/ApplicationInterface.cpp @@ -0,0 +1,5 @@ + +#include + +ApplicationInterface::~ApplicationInterface() +{} diff --git a/src/Applications/ApplicationInterface.hpp b/src/Applications/ApplicationInterface.hpp new file mode 100644 index 0000000..7e000f8 --- /dev/null +++ b/src/Applications/ApplicationInterface.hpp @@ -0,0 +1,20 @@ + +/*! + * \brief The ApplicationInterface class provides an interface for + * program wrapper which only require to have one object constructed + * and its run() method called. + */ +class ApplicationInterface +{ + public: + /*! + * \brief Destructor. + */ + virtual ~ApplicationInterface() ; + /*! + * \brief Runs the application, with all its + * functionalities. + * \return the exit code. + */ + virtual int run() = 0 ; +} ; diff --git a/src/Applications/ChIPPartitioningApplication.cpp b/src/Applications/ChIPPartitioningApplication.cpp new file mode 100644 index 0000000..e31149f --- /dev/null +++ b/src/Applications/ChIPPartitioningApplication.cpp @@ -0,0 +1,153 @@ + +#include +#include +#include + +#include +#include +#include +#include // std::invalid_argument + + +namespace po = boost::program_options ; + + +ChIPPartitioningApplication::ChIPPartitioningApplication(int argn, char** argv) + : file_data(""), n_class(0), n_iter(0), n_shift(0), flip(false), + n_threads(0), seeding(EMEngine::seeding_codes::RANDOM), + seed(""), runnable(true) +{ + // parse command line options and set the fields + this->parseOptions(argn, argv) ; +} + +int ChIPPartitioningApplication::run() +{ if(this->runnable) + { Matrix2D data(this->file_data) ; + EMEngine em(data, + this->n_class, + this->n_iter, + this->n_shift, + this->flip, + this->seeding, + this->seed, + this->n_threads) ; + int code = em.cluster() ; + std::cout << em.get_posterior_prob() << std::endl ; + return EXIT_SUCCESS ; + } + else + { return EXIT_FAILURE ; } +} + +void ChIPPartitioningApplication::parseOptions(int argn, char** argv) +{ + // no option to parse + if(argv == nullptr) + { std::string message = "no options to parse!" ; + throw std::invalid_argument(message) ; + } + + // help messages + std::string desc_msg = "\n" + "ChIPPartitioning is a probabilistic partitioning algorithm that \n" + "sofetly assigns genomic regions to classes given their shape \n" + "of the signal over the region. The assignment probabilities \n" + "are returned through stdout.\n\n" ; + std::string opt_help_msg = "Produces this help message." ; + std::string opt_parallel_msg = "The number of threads dedicated to the " + "computations, by default 1." ; + std::string opt_data_msg = "The path to the file containing the data"; + std::string opt_iter_msg = "The number of iterations." ; + std::string opt_class_msg = "The number of classes to find." ; + std::string opt_shift_msg = "Enables this number of column of shifting " + "freedom. By default, shifting is " + "disabled (equivalent to --shift 1)." ; + std::string opt_flip_msg = "Enables flipping."; + std::string opt_seeding_msg = "Specify which method should be used to initialise the " + "cluster references." ; + std::string opt_seed_msg = "A value to seed the random number generator."; + + // option parser + boost::program_options::variables_map vm ; + boost::program_options::options_description desc(desc_msg) ; + + std::string seeding_tmp ; + + desc.add_options() + ("help,h", opt_help_msg.c_str()) + + ("data,d", po::value(&(this->file_data)), opt_data_msg.c_str()) + + ("iter,i", po::value(&(this->n_iter)), opt_iter_msg.c_str()) + ("class,c", po::value(&(this->n_class)), opt_class_msg.c_str()) + ("shift,s", po::value(&(this->n_shift)), opt_shift_msg.c_str()) + ("flip", opt_flip_msg.c_str()) + + ("seeding", po::value(&(seeding_tmp)), opt_seeding_msg.c_str()) + ("seed", po::value(&(this->seed)), opt_seed_msg.c_str()) + ("parallel,p", po::value(&(this->n_threads)), opt_parallel_msg.c_str()) ; + + // parse + try + { po::store(po::parse_command_line(argn, argv, desc), vm) ; + po::notify(vm) ; + } + catch(std::invalid_argument& e) + { std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ; + throw std::invalid_argument(msg) ; + } + catch(...) + { throw std::invalid_argument("An unknown error occured while parsing the options") ; } + + bool help = vm.count("help") ; + + // checks unproper option settings + if(this->file_data == "" and (not help)) + { std::string msg("Error! No data file was given (--data)!") ; + throw std::invalid_argument(msg) ; + } + else if((seeding_tmp != "random") and + (seeding_tmp != "sampling") and + (seeding_tmp != "toy") and + (not help)) + { std::string msg("Error! Unrecognized seeding method (--seeding)!") ; + throw std::invalid_argument(msg) ; + } + + // no shift given, value of 1 -> no shift + if(this->n_shift == 0) + { this->n_shift = 1 ; } + // set seeding + if(seeding_tmp == "random") + { this->seeding = EMEngine::seeding_codes::RANDOM ; } + else if(seeding_tmp == "sampling") + { this->seeding = EMEngine::seeding_codes::SAMPLING ; } + else if(seeding_tmp == "toy") + { this->seeding = EMEngine::seeding_codes::TOY ; } + // set flip + if(vm.count("flip")) + { this->flip = true ; } + // threads + if(this->n_threads == 0) + { this->n_threads = 1 ; } + + // help invoked, run() cannot be invoked + if(help) + { std::cout << desc << std::endl ; + this->runnable = false ; + return ; + } + // everything fine, run() can be called + else + { this->runnable = true ; + return ; + } +} + + +int main(int argn, char** argv) +{ ChIPPartitioningApplication app(argn, argv) ; + return app.run() ; +} + diff --git a/src/Applications/ChIPPartitioningApplication.hpp b/src/Applications/ChIPPartitioningApplication.hpp new file mode 100644 index 0000000..c2611bc --- /dev/null +++ b/src/Applications/ChIPPartitioningApplication.hpp @@ -0,0 +1,99 @@ +#ifndef CHIPPPARTITIONINGAPPLICATION_HPP +#define CHIPPPARTITIONINGAPPLICATION_HPP + +#include +#include + +#include + +/*! + * \brief The ChIPPartitioningApplication class is a wrapper around an EMEngine + * instance creating an autonomous application to classify data by directly + * passing all the options and parameters from the command line. + */ +class ChIPPartitioningApplication: public ApplicationInterface +{ + public: + ChIPPartitioningApplication() = delete ; + ChIPPartitioningApplication(const ChIPPartitioningApplication& app) = delete ; + /*! + * \brief Constructs an object from the command line + * options. + * \param argn the number of options passed to the + * main() function. + * \param argv the vector of options passed to the + * main() function. + */ + ChIPPartitioningApplication(int argn, char** argv) ; + + /*! + * \brief Runs the application. The data are classified + * using the given settings and the posterior probability + * matrix is returned through the stdout. + * The matrix is a 4D matrix with dimensions : + * regions, class, shift flip. + * \return an exit code EXIT_SUCCESS or EXIT_FAILURE + * to return to the OS. + */ + virtual int run() override ; + + private: + /*! + * \brief Parses the program command line options and + * sets the object field accordingly. + * If the help option is detected, the "runnable" + * field is set to false and subsequent calls to + * run() will produce nothing. + * \param argn the number of options passed to the + * main() function. + * \param argv the vector of options passed to the + * main() function. + * \throw std::invalid_argument if an error is found + * in the program options. + */ + void parseOptions(int argn, char** argv) ; + + /*! + * \brief the path to the file containing the data. + */ + std::string file_data ; + /*! + * \brief the number of classes to partition the data into. + */ + size_t n_class ; + /*! + * \brief the number of iterations allowed. + */ + size_t n_iter ; + /*! + * \brief the shifting freedom. + */ + size_t n_shift ; + /*! + * \brief whether flipping freedom is allowed. + */ + bool flip ; + + /*! + * \brief the number of threads. + */ + size_t n_threads ; + + /*! + * \brief the seeding method to use. + */ + EMEngine::seeding_codes seeding ; + /*! + * \brief a seed to initialise the random number generator. + */ + std::string seed ; + + /*! + * \brief a flag indicating whether the core of run() can be + * run or not. + */ + bool runnable ; +} ; + + +#endif // CHIPPPARTITIONINGAPPLICATION_HPP diff --git a/src/Applications/ProbToRefApplication.cpp b/src/Applications/ProbToRefApplication.cpp new file mode 100644 index 0000000..9a2d308 --- /dev/null +++ b/src/Applications/ProbToRefApplication.cpp @@ -0,0 +1,113 @@ + +#include +#include +#include +#include + +#include +#include +#include + + +namespace po = boost::program_options ; + + +ProbToRefApplication::ProbToRefApplication(int argn, char** argv) + : file_data(""), file_prob(""), + n_threads(0), runnable(false) +{ this->parseOptions(argn, argv) ; } + +ProbToRefApplication::~ProbToRefApplication() +{} + +int ProbToRefApplication::run() +{ if(this->runnable) + { + Matrix2D data(this->file_data) ; + Matrix4D prob(this->file_prob) ; + ReferenceComputer refcomp(data, + prob, + this->n_threads) ; + std::cout << refcomp.get_references() << std::endl ; + return 0 ; + } + else + { return 1 ; } +} + +void ProbToRefApplication::parseOptions(int argn, char** argv) +{ // no option to parse + if(argv == nullptr) + { std::string message = "no options to parse!" ; + throw std::invalid_argument(message) ; + } + + // help messages + std::string desc_msg = "\n" + "ProbToRef reconstructs the class aggregation \n" + "patterns from the assignment probabilities computed \n" + "by ChIPPartitioning and the data partitioned.\n\n" ; + std::string opt_help_msg = "Produces this help message." ; + std::string opt_parallel_msg = "The number of threads dedicated to the \n" + "computations, by default 1." ; + std::string opt_data_msg = "The path to the file containing the data."; + std::string opt_prob_msg = "The path to the file containing the posterior probabilities." ; + + // option parser + boost::program_options::variables_map vm ; + boost::program_options::options_description desc(desc_msg) ; + + std::string seeding_tmp ; + + desc.add_options() + ("help,h", opt_help_msg.c_str()) + + ("data,d", po::value(&(this->file_data)), opt_data_msg.c_str()) + ("prob,p", po::value(&(this->file_prob)), opt_prob_msg.c_str()) + + ("parallel", po::value(&(this->n_threads)), opt_parallel_msg.c_str()) ; + + // parse + try + { po::store(po::parse_command_line(argn, argv, desc), vm) ; + po::notify(vm) ; + } + catch(std::invalid_argument& e) + { std::string msg = std::string("Error! Invalid option given!\n") + std::string(e.what()) ; + throw std::invalid_argument(msg) ; + } + catch(...) + { throw std::invalid_argument("An unknown error occured while parsing the options") ; } + + bool help = vm.count("help") ; + + // checks unproper option settings + if(this->file_data == "" and (not help)) + { std::string msg("Error! No data file was given (--data)!") ; + throw std::invalid_argument(msg) ; + } + else if(this->file_prob == "" and (not help)) + { std::string msg("Error! No posterior probabily file was given (--prob)!") ; + throw std::invalid_argument(msg) ; + } + // threads + if(this->n_threads == 0) + { this->n_threads = 1 ; } + + // help invoked, run() cannot be invoked + if(help) + { std::cout << desc << std::endl ; + this->runnable = false ; + return ; + } + // everything fine, run() can be called + else + { this->runnable = true ; + return ; + } +} + +int main(int argn, char** argv) +{ ProbToRefApplication app(argn, argv) ; + return app.run() ; +} diff --git a/src/Applications/ProbToRefApplication.hpp b/src/Applications/ProbToRefApplication.hpp new file mode 100644 index 0000000..5a2855a --- /dev/null +++ b/src/Applications/ProbToRefApplication.hpp @@ -0,0 +1,64 @@ + + + +#include +#include +#include + + +#include +#include + +/*! + * \brief The ProbToRefApplication class is a wrapper around an ReferenceComputer + * instance creating an autonomous application to compute the class references + * given the posterior probability matrix and the data by directly passing them + * from the command line. + */ +class ProbToRefApplication : public ApplicationInterface +{ + public: + ProbToRefApplication() = delete ; + ProbToRefApplication(const ProbToRefApplication& app) = delete ; + /*! + * \brief Constructs an object from the command line + * options. + * \param argn the number of options passed to the + * main() function. + * \param argv the vector of options passed to the + * main() function. + */ + ProbToRefApplication(int argn, char** argv) ; + /*! + * \brief Destructor. + */ + virtual ~ProbToRefApplication() override ; + /*! + * \brief Runs the application. The references + * are computed and displayed through the + * stdout. + * \return the exit code. + */ + virtual int run() override ; + + private: + /*! + * \brief Parses the program command line options and + * sets the object field accordingly. + * If the help option is detected, the "runnable" + * field is set to false and subsequent calls to + * run() will produce nothing. + * \param argn the number of options passed to the + * main() function. + * \param argv the vector of options passed to the + * main() function. + * \throw std::invalid_argument if an error is found + * in the program options. + */ + void parseOptions(int argn, char** argv) ; + + std::string file_data ; + std::string file_prob ; + size_t n_threads ; + bool runnable ; +} ; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..ea6464f --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,52 @@ + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin") + + +# include file location +include_directories("${scATACseq_SOURCE_DIR}/src/Matrix") +include_directories("${scATACseq_SOURCE_DIR}/src/Clustering") +include_directories("${scATACseq_SOURCE_DIR}/src/Random") +include_directories("${scATACseq_SOURCE_DIR}/src/Parallel") +include_directories("${scATACseq_SOURCE_DIR}/src/Statistics") +include_directories("${scATACseq_SOURCE_DIR}/src/Utility") +include_directories("${scATACseq_SOURCE_DIR}/src/GUI") +include_directories("${scATACseq_SOURCE_DIR}/src/Applications") +include_directories("${scATACseq_SOURCE_DIR}/src/Matrix") + +# compile modules into static libraries +add_library(Clustering "Clustering/ClusteringEngine.cpp" "Clustering/EMEngine.cpp" "Clustering/ReferenceComputer.cpp") +add_library(Random "Random/Random.cpp" "Random/RandomNumberGenerator.cpp") +add_library(Parallel "Parallel/ThreadPool.cpp") +add_library(Statistics "Statistics/Statistics.cpp") +add_library(GUI "GUI/ConsoleProgressBar.cpp" "GUI/Diplayable.cpp" "GUI/Updatable.cpp") +link_directories("${scATACseq_SOURCE_DIR}/src/Clustering") +link_directories("${scATACseq_SOURCE_DIR}/src/Random") +link_directories("${scATACseq_SOURCE_DIR}/src/Statistics") +link_directories("${scATACseq_SOURCE_DIR}/src/GUI") +link_directories("${scATACseq_SOURCE_DIR}/src/Parallel") + +# linking modules to resolve dependencies +target_link_libraries(Clustering Random Statistics GUI Parallel) +target_link_libraries(Parallel Threads::Threads) + +# executables +## a toy +set(EXE_MAIN "scATACseq") +add_executable(${EXE_MAIN} "main.cpp") +target_link_libraries(${EXE_MAIN} Clustering Threads::Threads) +set_target_properties(${EXE_MAIN} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin") +## an ChIPPartitioning standalone +set(EXE_CHIPPART "ChIPPartitioning") +add_executable(${EXE_CHIPPART} "Applications/ChIPPartitioningApplication.cpp" "Applications/ApplicationInterface.cpp") +target_link_libraries(${EXE_CHIPPART} Clustering Threads::Threads Boost::program_options) +set_target_properties(${EXE_CHIPPART} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin") +## an executable to compute classes references from the data and the post prob of ChIPPartitioning +set(EXE_PROB2REF "probToRef") +add_executable(${EXE_PROB2REF} "Applications/ProbToRefApplication.cpp" "Applications/ApplicationInterface.cpp") +target_link_libraries(${EXE_PROB2REF} Clustering Threads::Threads Boost::program_options) +set_target_properties(${EXE_PROB2REF} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin") +## a test suite +set(EXE_TESTS "unittests") +add_executable(${EXE_TESTS} "unittests.cpp" "Unittests/unittests_matrix.cpp") +target_link_libraries(${EXE_TESTS} ${UNITTEST_LIB}) +set_target_properties(${EXE_TESTS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${scATACseq_SOURCE_DIR}/bin") diff --git a/src/Clustering/ClusteringEngine.cpp b/src/Clustering/ClusteringEngine.cpp new file mode 100644 index 0000000..fe69e87 --- /dev/null +++ b/src/Clustering/ClusteringEngine.cpp @@ -0,0 +1,6 @@ +#include +#include + +ClusteringEngine::~ClusteringEngine() +{} + diff --git a/src/Clustering/ClusteringEngine.hpp b/src/Clustering/ClusteringEngine.hpp new file mode 100644 index 0000000..ecfc47b --- /dev/null +++ b/src/Clustering/ClusteringEngine.hpp @@ -0,0 +1,36 @@ +#ifndef CLUSTERINGENGINE_HPP +#define CLUSTERINGENGINE_HPP + +#include + +/*! + * \brief The ClusteringEngine class is an abstract class providing an interface + * to other classes implementing data clustering methods. + */ +class ClusteringEngine +{ + public: + /*! + * \brief The possible exit codes for the cluster method. + * 0 the clustering procedure converged, 1 the clustering + * procedure succeeded without converging, 2 the clustering + * failed. + */ + enum exit_codes {CONVERGENCE=0, SUCCESS, FAILURE, NCODE=3} ; + + public: + /*! + * \brief Destructor. + */ + virtual ~ClusteringEngine() ; + + /*! + * \brief Runs the clustering. + * \return an exit code indicating whether how the clustering + * ended. + */ + virtual exit_codes cluster() = 0 ; + +} ; + +#endif // CLUSTERINGENGINE_HPP diff --git a/src/Clustering/EMEngine.cpp b/src/Clustering/EMEngine.cpp new file mode 100644 index 0000000..b1c0eb3 --- /dev/null +++ b/src/Clustering/EMEngine.cpp @@ -0,0 +1,698 @@ +#include +#include +#include +#include +#include +#include // rand_int_uniform() +#include // getRandomNumberGenerator() +#include // poisson_pmf(), normal_pmf(), sd() +#include // >> for vectors +#include // ConsoleProgressBar +#include // ThreadPool +#include // log(), exp(), pow() +#include +#include // numeric_limits +#include // uniform_real, variate_generator +#include // future, promise +#include // move() +#include // bind(), ref() + + + +EMEngine::EMEngine(const Matrix2D& data, + size_t n_class, + size_t n_iter, + size_t n_shift, + bool flip, + EMEngine::seeding_codes seeding, + const std::string& seed, + size_t n_threads) + : flip(flip), n_iter(n_iter), n_shift(n_shift), n_flip(flip+1), n_class(n_class), + n_row(data.get_nrow()), n_col(data.get_ncol()), l_slice(n_col - n_shift + 1), + seeding_method(seeding), n_threads(n_threads), threads(n_threads) +{ + // initialise random number generator + getRandomGenerator(seed) ; + + // copy the data + this->data = matrix2d_i(this->n_row, v_i(this->n_col)) ; + for(size_t i=0; in_row; i++) + { for(size_t j=0; jn_col; j++) + { this->data[i][j] = data(i,j) ; } + } + +} + +EMEngine::~EMEngine() +{ this->threads.join() ; } + +Matrix2D EMEngine::get_references() const +{ + Matrix2D references(this->n_class, this->l_slice, 0.) ; + for(size_t i=0; in_class; i++) + { for(size_t j=0; jl_slice; j++) + { references(i,j) = this->references[i][j] ; } + } + return references ; + +} + +Matrix4D EMEngine::get_posterior_prob() const +{ Matrix4D post_prob(this->n_row, this->n_class, this->n_shift, this->n_flip, 0.) ; + for(size_t i=0; in_row; i++) + { for(size_t k=0; kn_class; k++) + { for(size_t s=0; sn_shift; s++) + { for(size_t f=0; fn_flip; f++) + { post_prob(i,k,s,f) = this->post_prob[i][k][s][f] ; } + } + } + } + return post_prob ; +} + +double EMEngine::get_loglikelihood() const +{ + double ll = 0 ; + + for(size_t i=0; in_row; i++) + { for(size_t j=0; jn_class; j++) + { for(size_t s=0; sn_shift; s++) + { // slice is [from_fw,to) + // from_dat_fw to_dat_fw [from_dat_fw, to_dat_fw] + // fw |---------->>>----------| + // ----------------------------------> data + // rev |----------<<<----------| [from_dat_rev, to_dat_rev] + // to_dat_rev can be -1 -> int + // to_dat_rev from_dat_rev + + // log likelihood + // --------------- forward --------------- + int from_dat_fw = s ; + int to_dat_fw = from_dat_fw + this->l_slice - 1 ; + for(int j_dat_fw=from_dat_fw, j_ref_fw=0; + j_dat_fw<=to_dat_fw; j_dat_fw++, j_ref_fw++) + { + double ll_fw = log(poisson_pmf(this->data[i][j_dat_fw], + this->references[n_class][j_ref_fw]*this->window_mean[i][s]) * + this->class_prob[j][s][flip_states::FORWARD]) ; + ll += std::max(ll_fw, EMEngine::p_min_log) ; + } + + + // --------------- reverse --------------- + if(this->flip) + { int from_dat_rev = this->n_col - 1 - s ; + int to_dat_rev = from_dat_rev - (this->l_slice - 1) ; + int shift_rev = this->n_shift - s - 1 ; + for(int j_dat_rev=from_dat_rev, j_ref_fw=0; + j_dat_rev >= to_dat_rev; j_dat_rev--, j_ref_fw++) + { double ll_rev = log(poisson_pmf(this->data[i][j_dat_rev], + this->references[n_class][j_ref_fw]*this->window_mean[i][shift_rev]) * + this->class_prob[j][s][flip_states::REVERSE]) ; + ll += std::max(ll_rev, EMEngine::p_min_log) ; + } + } + } + } + } + return ll ; +} + +ClusteringEngine::exit_codes EMEngine::cluster() +{ size_t bar_update_n = this->n_iter + 1 ; + ConsoleProgressBar bar(std::cerr, bar_update_n, 70, "clustering") ; + + // construct all other required data structures + // mean number of reads per window + this->window_mean = matrix2d_d(this->n_row, v_d(this->n_shift, 0.)) ; + this->compute_window_means() ; + + // the references + this->references = matrix2d_d(this->n_class, + v_d(this->l_slice, 0.)) ; + // log loglikelihood + this->loglikelihood = matrix4d_d(this->n_row, + matrix3d_d(this->n_class, + matrix2d_d(this->n_shift, + v_d(this->n_flip, 0.)))) ; + this->loglikelihood_max = v_d(this->n_row, 0.) ; + + // posterior prob + this->post_prob = matrix4d_d(this->n_row, + matrix3d_d(this->n_class, + matrix2d_d(this->n_shift, + v_d(this->n_flip, 0.)))) ; + this->class_prob = matrix3d_d(this->n_class, + matrix2d_d(this->n_shift, + v_d(this->n_flip, 0.))) ; + this->class_prob_tot = v_d(this->n_class, 0.) ; + this->post_prob_row = v_d(this->n_row, 0.) ; + this->post_prob_class = v_d(this->n_class, 0.) ; + this->post_prob_tot = 0. ; + + // seeding + this->seeding(this->seeding_method) ; + bar.update() ; + + // optimize the partition + for(size_t n_iter=0; n_itern_iter; n_iter++) + { + // normalize the references such thjat the mean value, on each + // row, is 1 + this->normalize_references() ; + // E-step + this->compute_loglikelihood() ; + this->compute_post_prob() ; + // M-step + this->compute_class_prob() ; + this->compute_references() ; + this->center_shifts() ; + bar.update() ; + } + bar.update() ; std::cerr << std::endl ; + return ClusteringEngine::exit_codes::SUCCESS ; +} + +EMEngine::EMEngine() +{} + +void EMEngine::normalize_references() +{ + for(size_t i=0; in_class; i++) + { double mean = 0. ; + for(size_t j=0; jl_slice; j++) + { mean += this->references[i][j] ; } + mean /= this->l_slice ; + for(size_t j=0; jl_slice; j++) + { this->references[i][j] /= mean ; } + } +} + +void EMEngine::seeding(EMEngine::seeding_codes seeding) +{ + if(seeding == EMEngine::seeding_codes::RANDOM) + { this->seeding_random() ; } + else if(seeding == EMEngine::seeding_codes::SAMPLING) + { this->seeding_sampling() ; } + else if(seeding == EMEngine::seeding_codes::TOY) + { this->seeding_toy() ; } +} + +void EMEngine::seeding_random() +{ + // get random values from a beta distribution cannot be done using boost so + // i) generate random number [0,1] x + // ii) compute f(x) where f is beta distribution + + matrix2d_d prob(this->n_row, v_d(this->n_class, 0.)) ; + v_d prob_class(this->n_class, 0.) ; + double tot_sum = 0. ; + + // sample the prob + // beta distribution parameters + double alpha = pow(this->n_row, -0.5) ; + double beta = 1. ; + for(size_t i=0; in_row; i++) + { double row_sum = 0. ; + for(size_t j=0; jn_class; j++) + { double x = rand_real_uniform(0., 1.0) ; + double p = std::max(EMEngine::p_min, beta_pmf(x, alpha, beta)) ; + prob[i][j] = p ; + prob_class[j] += p ; + tot_sum += p ; + row_sum += p ; + } + // normalize + for(size_t j=0; jn_class; j++) + { prob[i][j] /= row_sum ; } + } + + // class prob + for(auto& p : prob_class) + { p /= tot_sum ; } + + // compute the refererences + for(size_t i=0; in_row; i++) + { for(size_t j=0; jn_class; j++) + { for(size_t j_ref=0, j_dat=this->n_shift/2; j_refl_slice; j_ref++, j_dat++) + { this->references[j][j_ref] += (this->data[i][j_dat] * prob[i][j]) ; } + } + } + // normalize + for(size_t i=0; in_class; i++) + { for(size_t j=0; jl_slice; j++) + { this->references[i][j] ; } + } + + // set the class probabilities to a uniform distribution + double sum = this->n_class * this->n_shift * this->n_flip ; + for(size_t i=0; in_class; i++) + { for(size_t j=0; jn_shift; j++) + { for(size_t k=0; kn_flip; k++) + { this->class_prob[i][j][k] = 1./sum ; } + } + } +} + +void EMEngine::seeding_sampling() +{ + // sample data to initialise the references + std::vector choosen(this->n_row, false) ; + + for(size_t i=0; in_class; ) + { size_t index = rand_int_uniform(size_t(0), size_t(this->n_row-1)) ; + // already choose + if(choosen[index]) + { ; } + // not yet choosen as reference + else + { for(size_t j_ref=0, j_dat=this->n_shift/2; j_refl_slice; j_ref++, j_dat++) + { this->references[i][j_ref] = this->data[index][j_dat] ; } + choosen[index] = true ; + i++ ; + } + } + + // set the class probabilities to a uniform distribution + double sum = this->n_class * this->n_shift * this->n_flip ; + for(size_t i=0; in_class; i++) + { for(size_t j=0; jn_shift; j++) + { for(size_t k=0; kn_flip; k++) + { this->class_prob[i][j][k] = 1. / sum ; + } + } + } +} + +void EMEngine::seeding_toy() +{ + // sample data to initialise the references + std::vector choosen(this->n_row, false) ; + + for(size_t i=0; in_class; ) + { size_t index = i ; + // already choose + if(choosen[index]) + { ; } + // not yet choosen as reference + else + { for(size_t j_ref=0, j_dat=this->n_shift/2; j_refl_slice; j_ref++, j_dat++) + { this->references[i][j_ref] = this->data[index][j_dat] ; } + choosen[index] = true ; + i++ ; + } + } + + // set the class probabilities to a uniform distribution + double sum = this->n_class * this->n_shift * this->n_flip ; + for(size_t i=0; in_class; i++) + { for(size_t j=0; jn_shift; j++) + { for(size_t k=0; kn_flip; k++) + { this->class_prob[i][j][k] = 1./sum ; } + } + } +} + +void EMEngine::compute_window_means() +{ // compute the slices on which each thread will work + std::vector> slices = + ThreadPool::split_range(0, this->n_row, this->n_threads) ; + + // get promises and futures + // the function run by the threads will simply fill the promise with + // "true" to indicate that they are done + std::vector> promises(this->n_threads) ; + std::vector> futures(this->n_threads) ; + for(size_t i=0; in_threads; i++) + { futures[i] = promises[i].get_future() ; } + + // distribute work to threads + // -------------------------- threads start -------------------------- + for(size_t i=0; in_threads; i++) + { auto slice = slices[i] ; + this->threads.addJob(std::move( + std::bind(&EMEngine::compute_window_means_routine, + this, + slice.first, + slice.second, + std::ref(promises[i])))) ; + } + // wait until all threads are done working + for(auto& future : futures) + { future.get() ; } + // -------------------------- threads stop --------------------------- +} + +void EMEngine::compute_window_means_routine(size_t from, + size_t to, + std::promise& done) +{ + double l_slice = double(this->l_slice) ; + for(size_t i=from; in_shift; from++) + { double sum = 0. ; + // slice is [from,to) + size_t to = from + this->l_slice ; + for(size_t j=from; jdata[i][j] ;} + this->window_mean[i][from] = sum / l_slice ; + } + } + done.set_value(true) ; +} + +void EMEngine::compute_loglikelihood() +{ + // compute the slices on which each thread will work + std::vector> slices = + ThreadPool::split_range(0, this->n_row, this->n_threads) ; + + // get promises and futures + // the function run by the threads will simply fill the promise with + // "true" to indicate that they are done + std::vector> promises(this->n_threads) ; + std::vector> futures(this->n_threads) ; + for(size_t i=0; in_threads; i++) + { futures[i] = promises[i].get_future() ; } + + // distribute work to threads + // -------------------------- threads start -------------------------- + for(size_t i=0; in_threads; i++) + { auto slice = slices[i] ; + this->threads.addJob(std::move( + std::bind(&EMEngine::compute_loglikelihood_routine, + this, + slice.first, + slice.second, + std::ref(promises[i])))) ; + } + // wait until all threads are done working + for(auto& future : futures) + { future.get() ; } + // -------------------------- threads stop --------------------------- +} + +void EMEngine::compute_loglikelihood_routine(size_t from, size_t to, std::promise& done) +{ + // access in writing + // this->loglikelihood -> only access the i-th which belong [from,to) + // this->loglikelihood_max -> only access the i-th which belong [from,to) + + for(size_t i=from; iloglikelihood_max[i] = std::numeric_limits::lowest() ; + + for(size_t n_class=0; n_classn_class; n_class++) + { for(size_t n_shift=0; n_shiftn_shift; n_shift++) + { // slice is [from_fw,to) + // from_dat_fw to_dat_fw [from_dat_fw, to_dat_fw] + // fw |---------->>>----------| + // ----------------------------------> data + // rev |----------<<<----------| [from_dat_rev, to_dat_rev] + // to_dat_rev can be -1 -> int + // to_dat_rev from_dat_rev + + // log likelihood + // --------------- forward --------------- + int from_dat_fw = n_shift ; + int to_dat_fw = from_dat_fw + this->l_slice - 1 ; + double ll_fw = 0. ; + for(int j_dat_fw=from_dat_fw, j_ref_fw=0; + j_dat_fw<=to_dat_fw; j_dat_fw++, j_ref_fw++) + { + double ll = log(poisson_pmf(this->data[i][j_dat_fw], + this->references[n_class][j_ref_fw]*this->window_mean[i][n_shift])) ; + ll_fw += std::max(ll, EMEngine::p_min_log) ; + } + this->loglikelihood[i][n_class][from_dat_fw][flip_states::FORWARD] = ll_fw ; + // keep track of the max per row + if(ll_fw > this->loglikelihood_max[i]) + { this->loglikelihood_max[i] = ll_fw ; } + + + // --------------- reverse --------------- + if(this->flip) + { int from_dat_rev = this->n_col - 1 - n_shift ; + int to_dat_rev = from_dat_rev - (this->l_slice - 1) ; + int shift_rev = this->n_shift - n_shift - 1 ; + double ll_rev = 0. ; + for(int j_dat_rev=from_dat_rev, j_ref_fw=0; + j_dat_rev >= to_dat_rev; j_dat_rev--, j_ref_fw++) + { double ll = log(poisson_pmf(this->data[i][j_dat_rev], + this->references[n_class][j_ref_fw]*this->window_mean[i][shift_rev])) ; + ll_rev += std::max(ll, EMEngine::p_min_log) ; + } + this->loglikelihood[i][n_class][from_dat_fw][flip_states::REVERSE] = ll_rev ; + // keep track of the max per row + if(ll_rev > this->loglikelihood_max[i]) + { this->loglikelihood_max[i] = ll_rev ; } + } + } + } + } + // fill the promise to indicate that the function exited + done.set_value(true) ; +} + +void EMEngine::compute_post_prob() +{ + // compute the slices on which each thread will work + std::vector> slices = + ThreadPool::split_range(0, this->n_row, this->n_threads) ; + + // get promises and futures + // the function run by the threads will compute + // the partial sum per class of post_prob for the given slice + // this should be used to compute the complete sum of post_prob + // and the complete sum per class of post_prob + std::vector> promises(this->n_threads) ; + std::vector> futures(this->n_threads) ; + for(size_t i=0; in_threads; i++) + { futures[i] = promises[i].get_future() ; } + + // distribute work to threads + // -------------------------- threads start -------------------------- + for(size_t i=0; in_threads; i++) + { auto slice = slices[i] ; + this->threads.addJob(std::move( + std::bind(&EMEngine::compute_post_prob_routine, + this, + slice.first, + slice.second, + std::ref(promises[i])))) ; + } + // wait until all threads are done working + // compute the sum of post prob and the per class sum of post prob + // from the partial results computed on each slice + this->post_prob_tot = 0. ; + this->post_prob_class = v_d(this->n_class, 0.) ; + for(auto& future : futures) + { auto probs = future.get() ; + for(size_t i=0; in_class; i++) + { double prob = probs[i] ; + this->post_prob_class[i] += prob ; + this->post_prob_tot += prob ; + } + } + // -------------------------- threads stop --------------------------- +} + +void EMEngine::compute_post_prob_routine(size_t from, + size_t to, + std::promise& done) +{ + // this->post_prob_row -> only access the i-th which belong [from,to) + // this->post_prob -> only access the i-th which belong [from,to) + + // some values that needs to be returned + // the total of the posterior prob for this slice of the data + // the total per class of posterior prob for this slice of the data + v_d post_prob_class(this->n_class, 0.) ; + + for(size_t i=from; ipost_prob_row[i] = 0. ; + + for(size_t n_class=0; n_classn_class; n_class++) + { for(size_t n_shift=0; n_shiftn_shift; n_shift++) + { for(size_t n_flip=0; n_flipn_flip; n_flip++) + { double p = exp(this->loglikelihood[i][n_class][n_shift][n_flip] - + this->loglikelihood_max[i]) * + this->class_prob[n_class][n_shift][n_flip] ; + this->post_prob[i][n_class][n_shift][n_flip] = p ; + this->post_prob_row[i] += p ; + } + } + } + // normalize + for(size_t n_class=0; n_classn_class; n_class++) + { for(size_t n_shift=0; n_shiftn_shift; n_shift++) + { for(size_t n_flip=0; n_flipn_flip; n_flip++) + { this->post_prob[i][n_class][n_shift][n_flip] /= + this->post_prob_row[i] ; + double p = this->post_prob[i][n_class][n_shift][n_flip] ; + post_prob_class[n_class] += p ; + } + } + } + } + + done.set_value(post_prob_class) ; +} + +void EMEngine::compute_class_prob() +{ + for(size_t n_class=0; n_classn_class; n_class++) + { // reset total + this->class_prob_tot[n_class] = 0. ; + for(size_t n_shift=0; n_shiftn_shift; n_shift++) + { for(size_t flip=0; flipn_flip; flip++) + { // sum + this->class_prob[n_class][n_shift][flip] = 0. ; + for(size_t i=0; in_row; i++) + { this->class_prob[n_class][n_shift][flip] += + this->post_prob[i][n_class][n_shift][flip] ; + } + // normalize + this->class_prob[n_class][n_shift][flip] /= this->post_prob_tot ; + this->class_prob_tot[n_class] += this->class_prob[n_class][n_shift][flip] ; + } + } + } +} + +void EMEngine::compute_references() +{ + // compute the slices on which each thread will work + std::vector> slices = + ThreadPool::split_range(0, this->n_row, this->n_threads) ; + + // get promises and futures + // the function run by the threads will compute + // the reference from the given slice + std::vector> promises(this->n_threads) ; + std::vector> futures(this->n_threads) ; + for(size_t i=0; in_threads; i++) + { futures[i] = promises[i].get_future() ; } + + // distribute work to threads + // -------------------------- threads start -------------------------- + for(size_t i=0; in_threads; i++) + { auto& slice = slices[i] ; + this->threads.addJob(std::move( + std::bind(&EMEngine::compute_references_routine, + this, + slice.first, + slice.second, + std::ref(promises[i])))) ; + } + // while threads are working, reset the references + for(size_t i=0; in_class; i++) + { for(size_t j=0; jl_slice; j++) + { this->references[i][j] = 0. ; } + } + // wait until all threads are done working + // sum the partial class references to get the complete ones + for(size_t n=0; nn_threads; n++) + { matrix2d_d reference = futures[n].get() ; + for(size_t i=0; in_class; i++) + { for(size_t j=0; jl_slice; j++) + { this->references[i][j] += reference[i][j] ; } + } + } + // -------------------------- threads stop --------------------------- +} + +void EMEngine::compute_references_routine(size_t from, size_t to, std::promise& references) +{ // the empty references + matrix2d_d ref(this->n_class, v_d(this->l_slice, 0.)) ; + + for(size_t n_class=0; n_class < this->n_class; n_class++) + { + for(size_t i=from; in_shift; n_shift++) + { // --------------- forward --------------- + int from_dat_fw = n_shift ; + int to_dat_fw = from_dat_fw + this->l_slice - 1 ; + for(int j_dat_fw=from_dat_fw, j_ref_fw=0; + j_dat_fw<=to_dat_fw; j_dat_fw++, j_ref_fw++) + { ref[n_class][j_ref_fw] += + (this->post_prob[i][n_class][n_shift][flip_states::FORWARD] * this->data[i][j_dat_fw]) / + this->post_prob_class[n_class] ; + } + // --------------- reverse --------------- + if(this->flip) + { int from_dat_rev = this->n_col - 1 - n_shift ; + int to_dat_rev = from_dat_rev - (this->l_slice - 1) ; + for(int j_dat_rev=from_dat_rev, j_ref_fw=0; + j_dat_rev >= to_dat_rev; j_dat_rev--, j_ref_fw++) + { ref[n_class][j_ref_fw] += + (this->post_prob[i][n_class][n_shift][flip_states::REVERSE] * this->data[i][j_dat_rev]) / + this->post_prob_class[n_class] ; + } + } + } + } + } + references.set_value(ref) ; +} + +void EMEngine::center_shifts() +{ + if(this->n_shift == 1) + { return ; } + + // the possible shift states + std::vector shifts(this->n_shift) ; + std::iota(shifts.begin(), shifts.end(), 1.) ; + + // the shift probabilities and the class probabilies (no need to norm., class_prob sums to 1) + double shifts_prob_measured_tot = 0. ; + std::vector shifts_prob_measured(this->n_shift) ; + for(size_t s=0; sn_shift; s++) + { for(size_t k=0; kn_class; k++) + { for(size_t f=0; fn_flip; f++) + { shifts_prob_measured[s] += this->class_prob[k][s][f] ; + shifts_prob_measured_tot += this->class_prob[k][s][f] ; + } + } + } + + + // the shift mean and (biased) standard deviation + double shifts_sd = sd(shifts, shifts_prob_measured, false) ; + + // the shift probabilities under the assumption that is distributed as a gaussian centered on + // the central shift state with sd and mean as in the data + // sd as the data + std::vector shifts_prob_centered(shifts.size(), 0.) ; + double shifts_prob_centered_tot = 0. ; + for(size_t i=0; in_shift/2)+1, shifts_sd) ; + shifts_prob_centered_tot += shifts_prob_centered[i] ; + } + + for(size_t k=0; kn_class; k++) + { for(size_t f=0; fn_flip; f++) + { for(size_t s=0; sn_shift; s++) + { this->class_prob[k][s][f] = this->class_prob_tot[k] * shifts_prob_centered[s] / + (this->n_flip * shifts_prob_centered_tot) ; + } + } + } + + // shifts_prob_measured_tot = 0. ; + shifts_prob_measured.clear() ; + shifts_prob_measured.resize(this->n_shift) ; + for(size_t s=0; sn_shift; s++) + { for(size_t k=0; kn_class; k++) + { for(size_t f=0; fn_flip; f++) + { shifts_prob_measured[s] += this->class_prob[k][s][f] ; + } + } + } +} + +const double EMEngine::p_min = 1e-100 ; +const double EMEngine::p_min_log = log(EMEngine::p_min) ; + diff --git a/src/Clustering/EMEngine.hpp b/src/Clustering/EMEngine.hpp new file mode 100644 index 0000000..74d8619 --- /dev/null +++ b/src/Clustering/EMEngine.hpp @@ -0,0 +1,348 @@ +#ifndef EMENGINE_HPP +#define EMENGINE_HPP + +#include +#include +#include +#include +#include +#include +#include // promise, future + + +// some typdef +#include + + +/*! + * \brief TODO + */ +class EMEngine : public ClusteringEngine +{ + static const double p_min ; + static const double p_min_log ; + + public: + /*! + * \brief The possible seeding strategies. + */ + enum seeding_codes {RANDOM=0, SAMPLING, TOY} ; + + /*! + * \brief The possible flip states. + */ + enum flip_states{FORWARD=0, REVERSE} ; + + public: + /*! + * \brief Constructs an object. + * \param data the data to cluster. + * \param n_class the number of classes to partition the data into. + * \param n_iter the number of iterations. + * \param n_shift the shifting freedom. 1 means no shift. + * \param flip whether flipping is allowed. + * \param n_threads the number of threads dedicated to the + * computations. + */ + EMEngine(const Matrix2D& data, + size_t n_class, + size_t n_iter, + size_t n_shift, + bool flip, + seeding_codes seeding, + const std::string& seed=std::string(""), + size_t n_threads=1) ; + + /*! + * \brief Destructor. + */ + virtual ~EMEngine() override ; + + /*! + * \brief Returns a matrix with the class references, on + * each row. + * \return a matrix containing the class references, on + * each row. + */ + virtual Matrix2D get_references() const ; + + /*! + * \brief Returns a matrix with the posterior probabilies + * with the dimensions representing the data, classes, shifts + * and flips respectively. + * \return a matrix containing the posterior probabilities. + */ + virtual Matrix4D get_posterior_prob() const ; + + /*! + * \brief Returns the likelihood of the partition. + * \return the likelihood of the partition. + */ + + virtual double get_loglikelihood() const ; + + /*! + * \brief Runs the data clustering. + * \return + */ + virtual ClusteringEngine::exit_codes cluster() override ; + + + protected: + + EMEngine() ; + + virtual void normalize_references() ; + + /*! + * \brief Initialises the references using the corresponding method. + * \param seeding the method to use. + */ + virtual void seeding(seeding_codes seeding) ; + + /*! + * \brief Initialises the references randomly. + * Generates the initial references by randomly assigning + * the data to the classes using a beta distribution and + * all classes are set equally likely. + */ + virtual void seeding_random() ; + + /*! + * \brief The routine that randomly assigns posterior probabilities + * for the range [from,to) of rows in the data. + * This function is used to distribute the random seeding + * over several threads. + * This function is thread safe only as long as different [from,to) + * slices are given to the different threads. + * \param from the index of the first row to treat. + * \param to the index of the past last row to treat. + * \param probs a promise containing a vector with the sum of + * the posterior probability, for each class, computed for the + * given slice. + * This allows to synchronize threads. + */ + /* + void seeding_random_routine(size_t from, + size_t to, + std::promise& probs) ; + */ + + /*! + * \brief Initialises the K references by randomly + * sampling K rows in the data. The class are set + * equally probable. + */ + virtual void seeding_sampling() ; + + /*! + * \brief Initialises the K references using the first K + * rows in data. The class are set equally probable. + */ + virtual void seeding_toy() ; + + /*! + * \brief Computes the mean number of reads present in each slice (of + * length ncol - shift + 1), in each row of the data and store them in + * this->window_mean. + */ + virtual void compute_window_means() ; + + /*! + * \brief The routine that effectively computes the mean number of reads + * present in each slice, for the range [from,to) of rows in the data. + * This function is thread safe only as long as different [from,to) + * slices are given to the different threads. + * \param from the index of the first row to treat. + * \param to the index of the past last row to treat. + * \param done a promise filled when the function is done working. + * This allows to synchronize threads. + */ + virtual void compute_window_means_routine(size_t from, + size_t to, + std::promise& done) ; + + /*! + * \brief Computes the data log likelihood given the current references. + */ + virtual void compute_loglikelihood() ; + + /*! + * \brief The routine that effectively computes the log likelihoods + * for the range [from,to) of rows in the data. + * This function is used to distribute the log likelihood computations + * over several threads. + * This function is thread safe only as long as different [from,to) + * slices are given to the different threads. + * \param from the index of the first row to treat. + * \param to the index of the past last row to treat. + * \param done a promise filled when the function is done working. + * This allows to synchronize threads. + */ + virtual void compute_loglikelihood_routine(size_t from, + size_t to, + std::promise& done) ; + + /*! + * \brief Computes the data posterior probabilties. + */ + virtual void compute_post_prob() ; + + /*! + * \brief The routine that effectively computes the posterior + * probabilities for the range [from,to) of rows in the data. + * This function is used to distribute the posterior probability + * computations over several threads. + * This function is thread safe only as long as different [from,to) + * slices are given to the different threads. + * \param from the index of the first row to treat. + * \param to the index of the past last row to treat. + * \param probs a promise containing a vector with the sum of + * the posterior probability, for each class, computed for the + * given slice. + */ + virtual void compute_post_prob_routine(size_t from, + size_t to, + std::promise& probs) ; + + /*! + * \brief Computes the class probabilities from sthe posterior + * probabilities. + */ + virtual void compute_class_prob() ; + + /*! + * \brief Computes the class aggregations given the posterior + * probabilities. + */ + virtual void compute_references() ; + + /*! + * \brief A routine that computes the partial class references + * for the range [from,to) of rows in the data. To obtain the + * full class references, it is required to 1) run this routine + * on the whole data at once or 2) run it on different slices and + * sum up the partial references obtained. + * This function is used to distribute the posterior probability + * computations over several threads. + * This function is thread safe only as long as different [from,to) + * slices are given to the different threads. + * \param from the index of the first row to treat. + * \param to the index of the past last row to treat. + * \param class_ref a promise containing a matrix with the + * partial class references on each row. + */ + virtual void compute_references_routine(size_t from, + size_t to, + std::promise& class_ref) ; + + /*! + * \brief Modifies the class probabilities in such a way that the + * shift probabilities are then normaly distributed, centered on + * the middle shift state. However, the overall class probabilities + * remain unchanged. + */ + virtual void center_shifts() ; + + protected: + /*! + * \brief whether flip is enabled. + */ + bool flip ; + /*! + * \brief the number of iterations. + */ + size_t n_iter ; + /*! + * \brief the number of shift states. + */ + size_t n_shift ; + /*! + * \brief the number of flip states. + */ + size_t n_flip ; + /*! + * \brief the number of classes. + */ + size_t n_class ; + + /*! + * \brief the data. + */ + matrix2d_i data ; + /*! + * \brief the mean number of reads per window in the data. + */ + matrix2d_d window_mean ; + /*! + * \brief the class aggregation signal. + */ + matrix2d_d references ; + /*! + * \brief the log likelihoods. + */ + matrix4d_d loglikelihood ; + /*! + * \brief the max log likelihood value for each row. + */ + std::vector loglikelihood_max ; + /*! + * \brief the posterior probabilities. + */ + matrix4d_d post_prob ; + /*! + * \brief the class probabilities. + */ + matrix3d_d class_prob ; + /*! + * \brief the total prob per class. + */ + std::vector class_prob_tot ; + + /*! + * \brief the sum per row of post_prob. + */ + std::vector post_prob_row ; + /*! + * \brief the sum per class of post_prob. + */ + std::vector post_prob_class ; + /*! + * \brief the total of post_prob. + */ + double post_prob_tot ; + + /*! + * \brief the number of rows in data. + */ + size_t n_row ; + /*! + * \brief the number of columns in data. + */ + size_t n_col ; + /*! + * \brief the size of the pattern search and of + * the scanning window in the data. + */ + size_t l_slice ; + + /*! + * \brief the seeding method to use. + */ + EMEngine::seeding_codes seeding_method ; + + /*! + * \brief the number of threads. + */ + size_t n_threads ; + /*! + * \brief the threads. + */ + ThreadPool threads ; + + + +} ; + + +#endif // EMENGINE_HPP diff --git a/src/Clustering/ReferenceComputer.cpp b/src/Clustering/ReferenceComputer.cpp new file mode 100644 index 0000000..ae3ced9 --- /dev/null +++ b/src/Clustering/ReferenceComputer.cpp @@ -0,0 +1,69 @@ +#include + +#include +#include + +// some typdef +#include + +template +std::ostream& operator << (std::ostream& stream, const std::vector& v) +{ for(const auto& x : v) + { stream << x << " " ; } + stream << std::endl ; + return stream ; +} + +ReferenceComputer::ReferenceComputer(const Matrix2D& data, + const Matrix4D& posterior_prob, + size_t n_threads) + : EMEngine(data, + posterior_prob.get_dim()[1], + 1, + posterior_prob.get_dim()[2], + posterior_prob.get_dim()[3] == 2, + EMEngine::seeding_codes::RANDOM, + "", + n_threads) +{ + + // copy the data + this->data = matrix2d_i(this->n_row, v_i(this->n_col)) ; + for(size_t i=0; in_row; i++) + { for(size_t j=0; jn_col; j++) + { this->data[i][j] = data(i,j) ; } + } + + // initialise, copy and compute probs + this->post_prob = matrix4d_d(this->n_row, + matrix3d_d(this->n_class, + matrix2d_d(this->n_shift, + v_d(this->n_flip, 0.)))) ; + this->class_prob = matrix3d_d(this->n_class, + matrix2d_d(this->n_shift, + v_d(this->n_flip, 0.))) ; + this->class_prob_tot = v_d(this->n_class, 0.) ; + this->post_prob_class = v_d(this->n_class, 0.) ; + for(size_t i=0; in_row; i++) + { for(size_t j=0; jn_class; j++) + { for(size_t s=0; sn_shift; s++) + { for(size_t f=0; fn_flip; f++) + { double p = posterior_prob(i,j,s,f) ; + this->post_prob[i][j][s][f] = p ; + this->post_prob_class[j] += p ; + } + } + } + } + this->compute_class_prob() ; + + // compute the references + this->references = matrix2d_d(this->n_class, + v_d(this->l_slice, 0.)) ; + this->compute_references() ; + +} + +ReferenceComputer::~ReferenceComputer() +{ ; } + diff --git a/src/Clustering/ReferenceComputer.hpp b/src/Clustering/ReferenceComputer.hpp new file mode 100644 index 0000000..b843984 --- /dev/null +++ b/src/Clustering/ReferenceComputer.hpp @@ -0,0 +1,56 @@ +#ifndef REFERENCECOMPUTER_HPP +#define REFERENCECOMPUTER_HPP + +#include + +#include +#include + +/*! + * \brief The ReferenceComputer class is a wrapper around the + * EMEngine class that allows to compute the class references + * given the posterior probability matrix and the data without + * having to re-run the data classification. + * + * This class is typically made to be used in conjunction with + * an EMEngine instance, using the following pattern : + * + * Matrix2D data = ... ; + * EMEngine em(data, ...) ; + * em.cluster() ; + * auto prob = em.get_posterior_prob() ; + * auto ref = ReferenceComputer(data, prob, ...).get_references() ; + */ +class ReferenceComputer : public EMEngine +{ + public: + + ReferenceComputer() = delete ; + + /*! + * \brief Constructs an obect and computes the references. + * \param the data for which the classification probabilities + * have been generated. + * \param the classification probabilities for the given data, as + * return by an EMEngine instance (see above). + * \param n_threads the number of threads dedicated to the + * computations. + */ + ReferenceComputer(const Matrix2D& data, + const Matrix4D& posterior_prob, + size_t n_threads) ; + + /*! + * \brief Destructor. + */ + virtual ~ReferenceComputer() override ; + + // removes the following methods from the public interface to restrict it + private: + using EMEngine::get_loglikelihood ; + using EMEngine::cluster ; + +} ; + + +#endif // REFERENCECOMPUTER_HPP diff --git a/src/Clustering/typedef.hpp b/src/Clustering/typedef.hpp new file mode 100644 index 0000000..231fd50 --- /dev/null +++ b/src/Clustering/typedef.hpp @@ -0,0 +1,11 @@ +#ifndef TYPEDEFCLUSTERING_HPP +#define TYPEDEFCLUSTERING_HPP + +typedef std::vector v_i ; +typedef std::vector v_d ; +typedef std::vector matrix2d_i ; +typedef std::vector matrix2d_d ; +typedef std::vector matrix3d_d ; +typedef std::vector matrix4d_d ; + +#endif // TYPEDEFCLUSTERING_HPP diff --git a/src/Config.hpp b/src/Config.hpp new file mode 100644 index 0000000..8d5a774 --- /dev/null +++ b/src/Config.hpp @@ -0,0 +1,4 @@ +// the configured options and settings for Tutorial + +#define scATACseq_VERSION_MAJOR @scATACseq_VERSION_MAJOR@ +#define scATACseq_VERSION_MINOR @scATACseq_VERSION_MINOR@ diff --git a/src/GUI/ConsoleProgressBar.cpp b/src/GUI/ConsoleProgressBar.cpp new file mode 100644 index 0000000..3e182fc --- /dev/null +++ b/src/GUI/ConsoleProgressBar.cpp @@ -0,0 +1,32 @@ +#include "ConsoleProgressBar.hpp" + +ConsoleProgressBar::ConsoleProgressBar(std::ostream& stream, size_t repeats, size_t size, const std::string& prefix) + : _repeats(repeats), _size(size), _current(0), _prefix(prefix), _stream(stream) +{} + + +void ConsoleProgressBar::display() const +{ + char bar_repr[4096] ; + + double perc = 0. ; + if(this->_current == this->_repeats) + { perc = 100. ; } + else + { perc = static_cast(this->_current) / static_cast(this->_repeats) * 100 ; } + size_t x = this->_size * this->_current / this->_repeats ; + std::string bar ; + for(size_t i=0; i_size; i++) { bar += "." ; } + + sprintf(bar_repr, "%s : progress [%s] %.2f %%\r", this->_prefix.c_str(), bar.c_str(), perc) ; + this->_stream << bar_repr ; + this->_stream.flush() ; +} + + +void ConsoleProgressBar::update() +{ this->display() ; + if(this->_current < this->_repeats) + { this->_current++ ; } +} diff --git a/src/GUI/ConsoleProgressBar.hpp b/src/GUI/ConsoleProgressBar.hpp new file mode 100644 index 0000000..5a2eb78 --- /dev/null +++ b/src/GUI/ConsoleProgressBar.hpp @@ -0,0 +1,72 @@ +#ifndef CONSOLEPROGRESSBAR_HPP +#define CONSOLEPROGRESSBAR_HPP + +#include +#include + +#include "Diplayable.hpp" +#include "Updatable.hpp" + +/*! + * \brief The ConsoleProgressBar class displays a progress bar on a given + * stream to illustrate the progress of a process. + * Exemple : ConsoleProgrssBar(cout, 100, 10, "sending") + * after calling update() 30x should display + * on std::cout : + * sending : progress [===.......] 30.00 % + * + * 30% because 30x/100 + * + */ +class ConsoleProgressBar : public Displayable, public Updatable +{ + public : + ConsoleProgressBar() = delete ; + /*! + * \brief Constructor. Constructs an empty bar. + * \param stream the stream to display the bar on. + * \param repeats the maximal number of times a piece of the process needs to be + * repeated for the whole process to be done (and the progress bar to reach 100%). + * \param size the length of the progress bar, in characters on the stream (not + * accounting for some text on both sides. + * \param prefix a short piece of text describing what the progress bar is + * tracking, for information purposes only. + */ + ConsoleProgressBar(std::ostream& stream, size_t repeats, size_t size, const std::string& prefix) ; + + /*! + * \brief produces a display representation of the bar and sends it to stream. + */ + virtual void display() const override ; + /*! + * \brief calls display() and updates the state of the bar by 1 (this method + * can now be called one time less than before the call before is reached + * 100%). + */ + virtual void update() override ; + + private: + /*! + * \brief the number of times update() should be called before showing 100%. + */ + size_t _repeats ; + /*! + * \brief the size of the bar in number of characters when reaching 100%. + */ + size_t _size ; + /*! + * \brief the number of times update() was called so far. + */ + size_t _current ; + /*! + * \brief a brief description of what the bar tracks, to be displayed. + */ + std::string _prefix ; + /*! + * \brief a reference to the stream to display the bar representation to. + */ + std::ostream& _stream ; +} ; + + +#endif // CONSOLEPROGRESSBAR_HPP diff --git a/src/GUI/Diplayable.cpp b/src/GUI/Diplayable.cpp new file mode 100644 index 0000000..d77a402 --- /dev/null +++ b/src/GUI/Diplayable.cpp @@ -0,0 +1,4 @@ +#include "Diplayable.hpp" + +Displayable::~Displayable() +{} diff --git a/src/GUI/Diplayable.hpp b/src/GUI/Diplayable.hpp new file mode 100644 index 0000000..36b9e20 --- /dev/null +++ b/src/GUI/Diplayable.hpp @@ -0,0 +1,24 @@ +#ifndef DISPLAYABLE_HPP +#define DISPLAYABLE_HPP + +/*! + * \brief The Displayable class is an abstract class for classes meant + * to be displayed on the screen. + */ +class Displayable +{ + public: + /*! + * \brief Destructor. + */ + virtual ~Displayable() ; + + /*! + * \brief Should trigger the display of the instance + * on a given widget/window/stream. + */ + virtual void display() const = 0; +} ; + + +#endif // DISPLAYABLE_HPP diff --git a/src/GUI/Updatable.cpp b/src/GUI/Updatable.cpp new file mode 100644 index 0000000..c70e0b9 --- /dev/null +++ b/src/GUI/Updatable.cpp @@ -0,0 +1,4 @@ +#include "Updatable.hpp" + +Updatable::~Updatable() +{} diff --git a/src/GUI/Updatable.hpp b/src/GUI/Updatable.hpp new file mode 100644 index 0000000..c4380ab --- /dev/null +++ b/src/GUI/Updatable.hpp @@ -0,0 +1,23 @@ +#ifndef UPDATABLE_HPP +#define UPDATABLE_HPP + +/*! + * \brief The Updatable class is an abstract class for classes meant + * to be updated over time. + */ +class Updatable +{ + public: + /*! + * \brief Destructor. + */ + virtual ~Updatable() ; + + /*! + * \brief Should trigger the update of the instance. + */ + virtual void update() = 0; +} ; + + +#endif // UPDATABLE_HPP diff --git a/src/Matrix/Matrix.hpp b/src/Matrix/Matrix.hpp new file mode 100644 index 0000000..4f37a92 --- /dev/null +++ b/src/Matrix/Matrix.hpp @@ -0,0 +1,654 @@ +#ifndef MATRIX_HPP +#define MATRIX_HPP + + +#include +#include // accumulate() +#include +#include // setw(), setprecision(), fixed +#include // out_of_range, invalid_argument +#include // swap()f + + + +/*! + * \brief The Matrix class is a generic class to store data in a matrix. + * The matrix dimensionality can be any value : 1 is a vector, 2 is a regular + * 2D matrix, 3 is a 3D matrix, etc. + * + * In order to store the data properly and to perform all operations smoothly, the + * internal representation format differs from the "usual format". That is : the user + * provides coordinates as (x,y,z,...) where x referes to the row number, y to + * the column number, z the the z slice, etc. + * Internally however, x corresponds to the column number and y to the row number. + * Every other dimension has the same meaning. + * + * Internal representation : + * + * Here is an example of a 2x3 matrix (2D) + * + * {0,1,2,3,4,5} vector is turned to + * X + * ----------> + * 0 1 2 | + * 3 4 5 | Y + * \|/ + * + * dimensions are stored as {nx, ny} which corresponds to {ncol, nrow}. Coordinates + * are given using the universal format coord=(x,y) which are interpreted as {row, col}. + * Thus a simple swap(coord[0],coord[1]) should be performed to ensurethat the user given + * coordinates can be used in this referencial. + * + * + * Here is an example of a 2x3x2x2 matrix(4D) + * {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23} is turned to + * + * X + * -----------> | | + * 0 1 2 | | | + * 3 4 5 | Y | | + * \|/ | Z | + * 6 7 8 | | | + * 9 10 11 | Y | | + * \|/ \|/ | + * | A + * 12 13 14 | | | + * 15 16 17 | Y | | + * \|/ | Z | + * 18 19 20 | | | + * 21 22 23 | Y | | + * \|/ \|/ \|/ + * + * dimensions are stored as {nx, ny, nz, na} which corredponds to {ncol, nrow, nz, na}. + * Coordinates are given using the universal format coord=(x,y,z,a) which are interpreted + * as {row, col, z, a}. Thus a simple swap(coord[0],coord[1]) should be performed to ensure + * that the user given coordinates can be used in this referencial. + * + * + */ + +template +class Matrix +{ + public: + // constructors + Matrix() = default ; + /*! + * \brief Constructs an matrix with the given dimension with + * 0 values. + * \param dim the dimensions. + */ + Matrix(const std::vector& dim) ; + /*! + * \brief Constructs a matrix with the given dimensions and + * initialize the values to the given value. + * \param dim the dimensions. + * \param value the value to initialize the matrix content + * with. + */ + Matrix(const std::vector& dim, T value) ; + + /*! + * \brief Copy constructor. + * \param other the matrix to copy. + */ + Matrix (const Matrix& other) ; + + /*! + * \brief Destructor. + */ + virtual ~Matrix() = default ; + + // methods + /*! + * \brief Gets the element at the given offset. + * \param offset the offset of the element to get. + * \throw std::out_of_range exception if the offset + * is out of range. + * \return the element. + */ + T get(size_t offset) const throw(std::out_of_range) ; + + /*! + * \brief Gets the element at the given coordinates. + * \param coord the coordinates of the element to get. + * \throw std::out_of_range exception if the coordinates + * are out of range. + * \return the element. + */ + T get(const std::vector& coord) const throw(std::out_of_range) ; + + /*! + * \brief Sets the element at the given offset + * to the given value. + * \param offset the offset of the element to set. + * \param value the new value. + * \throw std::out_of_range exception if the offset + * is out of range. + */ + void set(size_t offset, T value) throw(std::out_of_range) ; + /*! + * \brief Sets the element at the given coordinates + * to the given value. + * \param coord the coordinates of the element to set. + * \param value the new value. + * \throw std::out_of_range exception if the coordinates + * are out of range. + */ + void set(const std::vector& coord, T value) throw(std::out_of_range) ; + + /*! + * \brief Gets the matrix dimensions. + * \return the dimensions. + */ + std::vector get_dim() const ; + + /*! + * \brief Gets the data vector. + * \return a a vector containing the data. + */ + std::vector get_data() ; + + /*! + * \brief Gets the number of dimensions (the length + * of the dimension vector). + * \return the number of dimensions + */ + size_t get_dim_size() const ; + + /*! + * \brief Gets the number of elements contained in the + * matrix. + * \return the number of element contained in the + * matrix. + */ + size_t get_data_size() const ; + + /*! + * \brief Returns the partial products of the dimensions. + * \return the partial products of the dimensions. + */ + std::vector get_dim_product() const ; + + /*! + * \brief Produces a nice representation of the matrix on the given + * stream. + * \param stream the stream. + * \param precision the rounding precision. + * \param width the column width in number of characters. + * \param sep the character separator. + */ + virtual void print(std::ostream& stram, size_t precision=4, size_t width=8, char sep=' ') const ; + + // operator + /*! + * \brief Assignment operator. + * \param other an other matrix to copy the values from. + * \return a reference to the current instance. + */ + Matrix& operator = (const Matrix& other) ; + + /*! + * \brief Adds value to each element. + * \param value the value to add. + * \return a reference to the instance. + */ + Matrix& operator += (T value) ; + + /*! + * \brief Substracts value to each element. + * \param value the value to substract. + * \return a reference to the instance. + */ + Matrix& operator -= (T value) ; + + /*! + * \brief Multiplies each element by value. + * \param value the value to multiply the elements by. + * \return a reference to the instance. + */ + Matrix& operator *= (T value) ; + + /*! + * \brief Divides each element by value. + * \param value the value to multiply the elements by. + * \throw std::invalid_argument if value is 0. + * \return a reference to the instance. + */ + Matrix& operator /= (T value) throw (std::invalid_argument) ; + + /*! + * \brief Comparison operator, returns true if + * both matrices are identical, that is do not + * have the same data and dimensions. + * \param other an other matrix. + * \return true if both matrices have the same + * data and dimensions. + */ + bool operator == (const Matrix& other) const ; + + /*! + * \brief Comparison operator, returns true if + * both matrices are different, that is do not + * have the same data and dimensions. + * \param other an other matrix. + * \return true if both matrices are different. + */ + bool operator != (const Matrix& other) const ; + + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param coord coord the coordinates of the element to get. + * \return a reference to this element. + */ + T& operator () (const std::vector& coord) ; + + /*! + * \brief Returns a const reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param coord coord the coordinates of the element to get. + * \return a const reference to this element. + */ + const T& operator () (const std::vector& coord) const ; + + protected: + // methods + /*! + * \brief Computes the partial dimension products and fills + * this->dim_prod according to the current values of + * this->_dim and this->dim_size. + */ + void compute_dim_product() ; + + /*! + * \brief Given a vector of at least 2 dimensional coordinates, + * it simply swaps the elements at index 0 (row number) and 1 + * (column number) to make them fit the x,y,... matrix + * reprensetation (x:number of columns, y:number of rows). + * \param coord a vector of coordinates (row, column, ...). + * \return a vector of coordinates corresponding to (x,y,...). + */ + std::vector swap_coord(const std::vector& coord) const ; + + /*! + * \brief Complementary function of convert_coord(). Given + * a vector of coordinates in (x,y,...) format, it turns it + * into (row,col,...) format. + * \param coord a vector of coordinates (x,y, ...). + * \return a vector of coordinates corresponding to (row,col,...). + */ + std::vector convert_coord_back(const std::vector& coord) const ; + + /*! + * \brief Checks whether a given offset is a valid offset or + * whether it is out of range. + * \param offset the offset to check. + * \return whether the offset is valid. + */ + bool is_valid(size_t offset) const ; + + /*! + * \brief Checks whether coordinates in (x,y,...) format are + * valid or whether they are out of range. + * \param offset the offset to check. + * \return whether the offset is valid. + */ + bool is_valid(const std::vector& coord) const ; + + /*! + * \brief Converts a vector of VALID (x,y,...) coordinates to a + * the corresponding offset allowing to get an element in the + * data vector. + * If the coordinate vector has a (row, column, ...) format, the + * result will be wrong. + * \param coord a vector of coordinates with (x,y,...) format. + * \return the corresponding offset. + */ + size_t convert_to_offset(const std::vector& coord) const ; + + /*! + * \brief Complementary function of convert_to_offset(). Given an + * offset, this function returns the corresponding coordinate + * vector in (x,y,...) format. + * \param offset a given offset. + * \return the corresponding vector of (x,y,..) coordinates. + */ + std::vector convert_to_coord(size_t offset) const ; + + // fields + /*! + * \brief The dimensions values. + */ + std::vector _dim ; + /*! + * \brief Stores the data. + */ + std::vector _data ; + /*! + * \brief The number of dimensions. + */ + size_t _dim_size ; + /*! + * \brief The number of data elements stored. + */ + size_t _data_size ; + + /*! + * \brief Contains the partial product of the dimensions. That is, + * the ith element contains the product of all the i-1 precedent + * dimensions : + * element 0 : 1, element 1 : x, element 2 : x*y, element 3 : x*y*z, + * and so one. + * This is used for coordinates to offset and offset to coordinates + * conversions. + */ + std::vector _dim_prod ; +} ; + +// operators +/*! + * \brief Addition operator. + * \param m the matrix of interest + * \param value the value to add to each element. + * \return the resulting matrix. + */ +template +const Matrix operator + (Matrix m, T value) +{ Matrix other(m) ; + other += value ; + return other ; +} + +/*! + * \brief Substraction operator + * \param m the matrix of interest. + * \param value the value to substract to each element. + * \return the resulting matrix. + */ +template +const Matrix operator - (Matrix m, T value) +{ Matrix other(m) ; + other -= value ; + return other ; +} + +/*! + * \brief Multiplication operator. + * \param m the matrix of interest. + * \param value the value to multiply each elements by. + * \return the resulting matrix. + */ +template +const Matrix operator * (Matrix m, T value) +{ Matrix other(m) ; + other *= value ; + return other ; +} + +/*! + * \brief Division operator. + * \param m the matrix of interest. + * \param value the value to divide each elements by. + * \throw std::invalid_argument if value is 0. + * \return the resulting matrix. + */ +template +const Matrix operator / (Matrix m, T value) throw (std::invalid_argument) +{ if(value == static_cast(0)) + { throw std::invalid_argument("division by 0!") ; } + Matrix other(m) ; + other /= value ; + return other ; +} + +/*! + * \brief Sends a representation of the matrix to the stream. + * \param stream the stream of interest. + * \param m the matrix of interest. + * \return a reference to the stream. + */ +template +std::ostream& operator << (std::ostream& stream, const Matrix& m) +{ m.print(stream) ; + return stream ; +} + + + +// method implementation +template +Matrix::Matrix(const std::vector& dim) + : Matrix(dim, 0) +{} + + +template +Matrix::Matrix(const std::vector& dim, T value) +{ this->_dim_size = dim.size() ; + this->_dim = this->swap_coord(dim) ; + this->_data_size = std::accumulate(dim.begin(), dim.end(), 1, std::multiplies()) ; + this->_data = std::vector(this->_data_size, value) ; + this->compute_dim_product() ; +} + +template +Matrix::Matrix(const Matrix &other) +{ *this = other ; } + + +template +T Matrix::get(size_t offset) const throw(std::out_of_range) +{ if(not this->is_valid(offset)) + { throw std::out_of_range("offset is out of range!") ; } + return this->_data[offset] ; +} + +template +T Matrix::get(const std::vector& coord) const throw(std::out_of_range) +{ std::vector coord_new = this->swap_coord(coord) ; + if(not this->is_valid(coord_new)) + { throw std::out_of_range("coordinates are out of range!") ; } + return this->_data[this->convert_to_offset(coord_new)] ; +} + + +template +void Matrix::set(size_t offset, T value) throw(std::out_of_range) +{ if(not this->is_valid(offset)) + { throw std::out_of_range("offset is out of range!") ; } + this->_data[offset] = value ; +} + +template +void Matrix::set(const std::vector& coord, T value) throw(std::out_of_range) +{ std::vector coord_new = this->swap_coord(coord) ; + if(not this->is_valid(coord_new)) + { throw std::out_of_range("coordinates are out of range!") ; } + this->_data[this->convert_to_offset(coord_new)] = value ; +} + + +template +std::vector Matrix::get_dim() const +{ return this->swap_coord(this->_dim) ; } + +template +std::vector Matrix::get_data() +{ return this->_data ; } + +template +size_t Matrix::get_dim_size() const +{ return this->_dim_size ; } + +template +size_t Matrix::get_data_size() const +{ return this->_data_size ; } + +template +std::vector Matrix::get_dim_product() const +{ return this->_dim_prod ; } + +template +void Matrix::print(std::ostream& stream, size_t precision, size_t width, char sep) const +{ stream.setf(std::ios::left) ; + stream << std::setprecision(precision) << std::fixed ; + for(size_t i=0; iget_data_size(); i++) + { stream << std::setw(width) << this->get(i) << sep ; } +} + +template +Matrix& Matrix::operator = (const Matrix& other) +{ + this->_dim = other._dim ; + this->_dim_size = other._dim_size ; + this->_data = other._data ; + this->_data_size = other._data_size ; + this->_dim_prod = other._dim_prod ; + return *this ; +} + +template +Matrix& Matrix::operator += (T value) +{ for(auto& i : this->_data) + { i += value ; } + return *this ; +} + +template +Matrix& Matrix::operator -= (T value) +{ for(auto& i : this->_data) + { i -= value ; } + return *this ; +} + +template +Matrix& Matrix::operator *= (T value) +{ for(auto& i : this->_data) + { i *= value ; } + return *this ; +} + +template +Matrix& Matrix::operator /= (T value) throw (std::invalid_argument) +{ + if(value == static_cast(0)) + { throw std::invalid_argument("division by 0!") ; } + + for(auto& i : this->_data) + { i /= value ; } + return *this ; +} + +template +bool Matrix::operator == (const Matrix& other) const +{ if(&other == this) + { return true ; } + // check dim + if(this->_dim_size != other._dim_size) + { return false ; } + for(size_t i=0; i_dim_size; i++) + { if(this->_dim[i] != other._dim[i]) + { return false ; } + } + // check data + if(this->_data_size != other._data_size) + { return false ; } + for(size_t i=0; i_data_size; i++) + { if(this->_data[i] != other._data[i]) + { return false ; } + } + return true ; +} + +template +bool Matrix::operator !=(const Matrix& other) const +{ return not ((*this) == other) ;} + +template +T& Matrix::operator () (const std::vector& coord) +{ std::vector coord_new = this->swap_coord(coord) ; + return this->_data[this->convert_to_offset(coord_new)] ; +} + +template +const T& Matrix::operator () (const std::vector& coord) const +{ std::vector coord_new = this->swap_coord(coord) ; + return this->_data[this->convert_to_offset(coord_new)] ; +} + + +template +void Matrix::compute_dim_product() +{ this->_dim_prod = std::vector(this->_dim_size, 0) ; + this->_dim_prod[0] = 1 ; + if(this->_dim_size > 1) + { this->_dim_prod[1] = this->_dim[0] ; } + if(this->_dim_size > 2) + { for(size_t i=2; i_dim_size; i++) + { this->_dim_prod[i] = this->_dim_prod[i-1]*this->_dim[i-1] ; } + } +} + + +template +std::vector Matrix::swap_coord(const std::vector &coord) const +{ std::vector coord_new = coord ; + // reformat coord = (row,col,...) = (y,y,...) into coord = (col,row,...) = (x,y,...) + if(this->_dim_size > 1) + { std::swap(coord_new[0], coord_new[1]) ; } + return coord_new ; +} + + +template +bool Matrix::is_valid(size_t offset) const +{ if(offset > this->_data_size-1) + { return false ; } + return true ; +} + +template +bool Matrix::is_valid(const std::vector& coord) const +{ if(coord.size() != this->_dim_size) + { return false ; } + for(size_t i=0; i this->_dim[i]) + { return false ; } + } + return true ; +} + + + +template +size_t Matrix::convert_to_offset(const std::vector& coord) const +{ size_t offset = 0 ; + + for(size_t i=0; i_dim_size; i++) + { offset += coord[i] * this->_dim_prod[i] ; } + + return offset ; +} + + +template +std::vector Matrix::convert_to_coord(size_t offset) const +{ + std::vector coord(this->_dim_size, 0) ; + + for(int i=this->_dim_size-1; i>=0; i--) + { size_t c = offset / this->_dim_prod[i] ; + coord[i] = c ; + offset -= (this->_dim_prod[i]*c) ; + } + + return coord ; +} + + + + +#endif // MATRIX_HPP diff --git a/src/Matrix/Matrix2D.hpp b/src/Matrix/Matrix2D.hpp new file mode 100644 index 0000000..2a532c0 --- /dev/null +++ b/src/Matrix/Matrix2D.hpp @@ -0,0 +1,481 @@ +#ifndef MATRIX2D_HPP +#define MATRIX2D_HPP + +#include + +#include +#include +#include // ifstream +#include +#include // setw(), setprecision(), fixed +#include // istringstream +#include // runtime_error, out_of_range + +#define BUFFER_SIZE 4096 + +/*! The Matrix2D class is a specialisation of the Matrix + * class to make work with 2D matrices easier. + * + * A text format is defined to store such matrices. + * In this format, each row is written on a single line + * and the values should separated by any blank character + * (tab, space, multiple spaces, ...). Empty lines are + * not allowed. + * + * ---- start ---- + * 1 2 3 + * 4 5 6 + * 7 8 9 + * ----- end ----- + * + * Constructing a matrix from an empty file (0 bytes or only an EOL char) returns a null + * matrix (0x0 dimensions). Writting a null matrix (that is with at least one null + * dimension creates an empty file. + * + */ +template +class Matrix2D : public Matrix +{ + public: + // constructors + Matrix2D() = default ; + /*! + * \brief Constructs a matrix with the given dimensions, + * filled with 0 values. + * \param nrow the number of rows. + * \param ncol the number of columns. + */ + Matrix2D(size_t nrow, size_t ncol) ; + /*! + * \brief Constructs a matrix with the given dimensions and + * initialize the values to the given value. + * \param nrow the number of rows. + * \param ncol the number of columns. + * \param value the value to initialize the matrix content + * with. + */ + Matrix2D(size_t nrow, size_t ncol, T value) ; + /*! + * \brief Copy constructor + * \param other the matrix to copy the content from. + */ + Matrix2D(const Matrix2D& other) ; + /*! + * \brief Constructs a matrix from a text file. A matrix contructed + * from an empty file (or a file containing only one EOL char) returns + * an empty matrix (null dimensions). + * \param file_address the address of the file containing the matrix. + * \throw std::runtime_error if anything happen while reading the + * file (format error, file not found, etc). + */ + Matrix2D(const std::string& file_address) throw (std::runtime_error) ; + + /*! + * \brief Destructor. + */ + virtual ~Matrix2D() = default ; + + // methods overloaded in Matrix + using Matrix::get ; + using Matrix::set ; + + // methods + /*! + * \brief Gets the element at the given coordinates. + * \param row the row number of the element to set. + * \param col the column number of the element to set. + * \throw std::out_of_range exception if the coordinates + * are out of range. + * \return the element. + */ + T get(size_t row, size_t col) const throw(std::out_of_range) ; + /*! + * \brief Sets the element at the given coordinates + * to the given value. + * \param row the row number of the element to set. + * \param col the column number of the element to set. + * \param value the new value. + * \throw std::out_of_range exception if the coordinates + * are out of range. + */ + void set(size_t row, size_t col, T value) throw (std::out_of_range) ; + + /*! + * \brief Gets the number of rows. + * \return the number of rows. + */ + size_t get_nrow() const ; + /*! + * \brief Gets the number of columns. + * \return the number of columns. + */ + size_t get_ncol() const ; + + /*! + * \brief Gets the values in the i-th row. + * \param i the row of interest. + * \throw std::out_of_range if i is out of range. + * \return the values in this row. + */ + std::vector get_row(size_t i) const throw (std::out_of_range) ; + /*! + * \brief Gets the values in the i-th column. + * \param i the column of interest. + * \throw std::out_of_range if i is out of range. + * \return the values in this column. + */ + std::vector get_col(size_t i) const throw (std::out_of_range) ; + + /*! + * \brief Sets the values of a given rows with the values of a given + * vector. + * \param i the row of interest. + * \param values the new values. + * \throw std::out_of_range if i is out of range. + * \throw std::invalid_argument if values does not have a length equal + * to the number of columns of the matrix. + */ + void set_row(size_t i, const std::vector& values) throw (std::out_of_range, std::invalid_argument) ; + /*! + * \brief Sets the values of a given column with the values of a given + * vector. + * \param i the column of interest. + * \param values the new values. + * \throw std::out_of_range if i is out of range. + * \throw std::invalid_argument if values does not have a length equal + * to the number of rows of the matrix. + */ + void set_col(size_t i, const std::vector& values) throw (std::out_of_range, std::invalid_argument) ; + + /*! + * \brief Produces a nice representation of the matrix on the given + * stream. + * \param stream the stream. + * \param precision the rounding precision. + * \param width the column width in number of characters. + * \param sep the character separator. + */ + virtual void print(std::ostream& stram, size_t precision=4, size_t width=8, char sep=' ') const override ; + + // operators + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param row the row number of the element to set. + * \param col the column number of the element to set. + * \return a reference to this element. + */ + T& operator () (size_t row, size_t col) ; + + /*! + * \brief Returns a const reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param row the row number of the element to set. + * \param col the column number of the element to set. + * \return a const reference to this element. + */ + const T& operator () (size_t row, size_t col) const ; + +} ; + +// operators +/*! + * \brief Addition operator. + * \param m the matrix of interest + * \param value the value to add to each element. + * \return the resulting matrix. + */ +template +const Matrix2D operator + (Matrix2D m, T value) +{ Matrix2D other(m) ; + m += value ; + return m ; +} + +/*! + * \brief Substraction operator + * \param m the matrix of interest. + * \param value the value to substract to each element. + * \return the resulting matrix. + */ +template +const Matrix2D operator - (Matrix2D m, T value) +{ Matrix2D other(m) ; + m -= value ; + return m ; +} + +/*! + * \brief Multiplication operator. + * \param m the matrix of interest. + * \param value the value to multiply each elements by. + * \return the resulting matrix. + */ +template +const Matrix2D operator * (Matrix2D m, T value) +{ Matrix2D other(m) ; + m *= value ; + return m ; +} + +/*! + * \brief Division operator. + * \param m the matrix of interest. + * \param value the value to divide each elements by. + * \throw std::invalid_argument if value is 0. + * \return the resulting matrix. + */ +template +const Matrix2D operator / (Matrix2D m, T value) throw (std::invalid_argument) +{ if(value == static_cast(0)) + { throw std::invalid_argument("division by 0!") ; } + Matrix2D other(m) ; + other /= value ; + return other ; +} + +/*! + * \brief Sends a representation of the matrix to the stream. + * \param stream the stream of interest. + * \param m the matrix of interest. + * \return a reference to the stream. + */ +template +std::ostream& operator << (std::ostream& stream, const Matrix2D& m) +{ m.print(stream) ; + return stream ; +} + +// other usefull functions +/*! + * \brief Produces a transpose of the given matrix. + * \param m a matrix. + */ +template +Matrix2D transpose(const Matrix2D& m) ; + + +// method implementation +template +Matrix2D transpose(const Matrix2D& m) +{ std::vector dim = m.get_dim() ; + size_t nrow = dim[0] ; + size_t ncol = dim[1] ; + Matrix2D m2(ncol, nrow, 0) ; + for(size_t i=0; i +Matrix2D::Matrix2D(size_t nrow, size_t ncol) + : Matrix2D(nrow, ncol, static_cast(0)) +{} + +template +Matrix2D::Matrix2D(size_t nrow, size_t ncol, T value) + : Matrix({nrow, ncol}, value) +{} + +template +Matrix2D::Matrix2D(const Matrix2D& other) + : Matrix(other) +{} + +template +Matrix2D::Matrix2D(const std::string &file_address) throw (std::runtime_error) +// : Matrix({0,0}) +{ + this->_dim = {0,0} ; + this->_data = std::vector() ; + this->_dim_size = this->_dim.size() ; + this->_data_size = this->_data.size() ; + this->_dim_prod = std::vector(this->_dim_size, 0) ; + + std::ifstream file(file_address, std::ifstream::in) ; + if(file.fail()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! cannot open %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + std::string buffer_str ; + std::vector buffer_vec ; + T buffer_T ; + + // read file + size_t n_line = 0 ; + size_t row_len = 0 ; + + while(getline(file, buffer_str)) + { // check stream status and read content + if(file.fail()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + if(buffer_str.size() == 0) + { // this file only contains one eol char and should be considered as empty, + // -> returns empty matrix not an error + if(n_line == 0 and file.peek() == EOF and file.eof()) + { break ; } + + file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! while reading %s (empty line)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + // parse line + buffer_vec.clear() ; + std::istringstream buffer_ss(buffer_str) ; + while(buffer_ss >> buffer_T) + { buffer_vec.push_back(buffer_T) ; } + // check for an error which likely indicates that a value could not be + // casted into a type T (mixed data types in the file) + if(buffer_ss.fail() and not buffer_ss.eof()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! could not read a line in %s (incompatible data types)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + // check that number of column is constant + if(n_line == 0) + { row_len = buffer_vec.size() ; } + else if(buffer_vec.size() != row_len) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! variable number of columns in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + // update matrix content + for(auto i : buffer_vec) + { this->_data.push_back(i) ; + this->_data_size++ ; + } + this->_dim[1]++ ; + n_line++ ; + } + file.close() ; + + this->_dim[0] = row_len ; + this->compute_dim_product() ; +} + + + +template +T Matrix2D::get(size_t row, size_t col) const throw(std::out_of_range) +{ try + { return this->get({row, col}) ; } + catch(std::out_of_range& e) + { throw e ; } +} + + +template +void Matrix2D::set(size_t row, size_t col, T value) throw(std::out_of_range) +{ try + { this->set({row, col}, value) ; } + catch(std::out_of_range& e) + { throw e ; } +} + + +template +size_t Matrix2D::get_nrow() const +{ return this->_dim[1] ; } + + +template +size_t Matrix2D::get_ncol() const +{ return this->_dim[0] ; } + + +template +std::vector Matrix2D::get_row(size_t i) const throw (std::out_of_range) +{ if(i>=this->get_nrow()) + { throw std::out_of_range("row index is out of range!") ; } + + std::vector row(this->get_ncol()) ; + for(size_t j=i*this->get_ncol(), n=0; nget_ncol(); j++, n++) + { row[n] = this->_data[j] ; } + + return row ; +} + + +template +std::vector Matrix2D::get_col(size_t i) const throw (std::out_of_range) +{ if(i>=this->get_ncol()) + { throw std::out_of_range("column index is out of range!") ; } + + std::vector col(this->get_nrow()) ; + for(size_t j=i, n=0; nget_nrow(); j+=this->get_ncol(), n++) + { col[n] = this->_data[j] ; } + + return col ; +} + + +template +void Matrix2D::set_row(size_t i, const std::vector& values) throw (std::out_of_range, std::invalid_argument) +{ if(i>=this->get_nrow()) + { throw std::out_of_range("row index is out of range!") ; } + else if(values.size() != this->get_ncol()) + { throw std::invalid_argument("the given vector length is not equal to the number of columns!") ; } + + for(size_t j=i*this->get_ncol(), n=0; nget_ncol(); j++, n++) + { this->_data[j] = values[n] ; } +} + + +template +void Matrix2D::set_col(size_t i, const std::vector& values) throw (std::out_of_range, std::invalid_argument) +{ if(i>=this->get_ncol()) + { throw std::out_of_range("row index is out of range!") ; } + else if(values.size() != this->get_nrow()) + { throw std::invalid_argument("the given vector length is not equal to the number of rows!") ; } + + for(size_t n=0, j=i; nget_nrow(); n++, j+=this->get_ncol()) + { this->_data[j] = values[n] ; } +} + +template +void Matrix2D::print(std::ostream& stream, size_t precision, size_t width, char sep) const +{ stream.setf(std::ios::left) ; + stream << std::setprecision(precision) << std::fixed ; + + size_t n = 0 ; + size_t n_tot = this->get_nrow()*this->get_ncol() ; + + for(size_t i=0; iget_nrow(); i++) + { for(size_t j=0; jget_ncol(); j++, n++) + { stream << std::setw(width) << (*this)(i,j) << sep ; } + if(n +T& Matrix2D::operator () (size_t row, size_t col) +{ std::vector coord = {col, row} ; + return this->_data[this->convert_to_offset(coord)] ; +} + + +template +const T& Matrix2D::operator () (size_t row, size_t col) const +{ std::vector coord = {col, row} ; + return this->_data[this->convert_to_offset(coord)] ; +} + + +#endif // MATRIX2D_HPP + + diff --git a/src/Matrix/Matrix3D.hpp b/src/Matrix/Matrix3D.hpp new file mode 100644 index 0000000..5fc6572 --- /dev/null +++ b/src/Matrix/Matrix3D.hpp @@ -0,0 +1,444 @@ +#ifndef MATRIX3D_HPP +#define MATRIX3D_HPP + +#include + +#include +#include +#include +#include // setw(), setprecision(), fixed +#include // ifstream +#include // istringstream +#include // runtime_error, out_of_range +#include // equal() + +#define BUFFER_SIZE 4096 + +/*! + * The Matrix3D class is a specialisation of the Matrix + * class to make work with 3D matrices more easily. + * + * A text file format is defined to store such matrices. The specifications are as + * follows : + * Absolutely NO empty lines are allowed! + * The following lines should contain : + * + * 1st line : a slice header, ',,0' indicates that a slice of the 3rd dimension + * is beginning (this is a z slice). + * 2nd - Nth line : the firt slice, as a 2d matrix (the exemple below has dimensions 3x4). + * N+1th line : a slice header, ',,1' indicates that the 2nd slice is beginning. + * N+1th - ... : the second slice + * and so on... + * + * Example of a 3x4x2 3D matrix + * ---- start ---- + * ,,0 + * 1 2 3 4 + * 5 6 7 8 + * 8 9 10 11 + *,,1 + * 12 13 14 15 + * 16 17 18 19 + * 20 21 22 23 + * ----- end ----- + * + * Constructing a matrix from an empty file (0 bytes or only an EOL char) returns a null + * matrix (0x0x0 dimensions). Writting a null matrix (that is with at least one null + * dimension creates an empty file. + * + */ +template +class Matrix3D : public Matrix +{ + public: + // constructors + Matrix3D() = default ; + /*! + * \brief Constructs a matrix with the given dimensions, + * filled with 0 values. + * \param dim1 the first dimension. + * \param dim2 the second dimension. + * \param dim3 the third dimension. + */ + Matrix3D(size_t dim1, size_t dim2, size_t dim3) ; + /*! + * \brief Constructs a matrix with the given dimensions and + * initialize the values to the given value. + * \param dim1 the first dimension. + * \param dim2 the second dimension. + * \param dim3 the third dimension. + * \param value the value to initialize the matrix content + * with. + */ + Matrix3D(size_t dim1, size_t dim2, size_t dim3, T value) ; + /*! + * \brief Copy constructor + * \param other the matrix to copy the content from. + */ + Matrix3D(const Matrix3D& other) ; + /*! + * \brief Constructs a matrix from a text file. A matrix contructed + * from an empty file (or a file containing only one EOL char) returns + * an empty matrix (null dimensions). + * \param file_address the address of the file containing the matrix. + * \throw std::runtime_error if anything happen while reading the + * file (format error, file not found, etc). + */ + Matrix3D(const std::string& file_address) throw (std::runtime_error) ; + + /*! + * \brief Destructor. + */ + virtual ~Matrix3D() = default ; + + // methods overloaded from Matrix + using Matrix::get ; + using Matrix::set ; + + // methods + /*! + * \brief Gets the element at the given coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \throw std::out_of_range exception if the coordinates + * are out of range. + * \return the element. + */ + T get(size_t dim1, size_t dim2, size_t dim3) const throw (std::out_of_range) ; + /*! + * \brief Sets the element at the given coordinates + * to the given value. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param value the new value. + * \throw std::out_of_range exception if the coordinates + * are out of range. + */ + void set(size_t dim1, size_t dim2, size_t dim3, T value) throw (std::out_of_range) ; + + /*! + * \brief Produces a nice representation of the matrix on the given + * stream. + * \param stream the stream. + * \param precision the rounding precision. + * \param width the column width in number of characters. + * \param sep the character separator. + */ + virtual void print(std::ostream& stream, size_t precision=4 ,size_t width=8, char sep=' ') const override ; + + // operators + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \return a reference to this element. + */ + T& operator() (size_t dim1, size_t dim2, size_t dim3) ; + /*! + * \brief Returns a constant reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \return a constant reference to this element. + */ + const T& operator() (size_t dim1, size_t dim2, size_t dim3) const ; + + private: + // methods + /*! + * \brief Checks whether a given string is a slice header + * (such as ",,0"), as found in files storing Matrix3D. + * \param str the string to check. + * \return whether the string is a slice header. + */ + bool is_header(const std::string& str) const ; + +} ; + +// operators +/*! + * \brief Addition operator. + * \param m the matrix of interest + * \param value the value to add to each element. + * \return the resulting matrix. + */ +template +const Matrix3D operator + (Matrix3D m, T value) +{ Matrix3D other(m) ; + m += value ; + return m ; +} + +/*! + * \brief Substraction operator + * \param m the matrix of interest. + * \param value the value to substract to each element. + * \return the resulting matrix. + */ +template +const Matrix3D operator - (Matrix3D m, T value) +{ Matrix3D other(m) ; + m -= value ; + return m ; +} + +/*! + * \brief Multiplication operator. + * \param m the matrix of interest. + * \param value the value to multiply each elements by. + * \return the resulting matrix. + */ +template +const Matrix3D operator * (Matrix3D m, T value) +{ Matrix3D other(m) ; + m *= value ; + return m ; +} + +/*! + * \brief Division operator. + * \param m the matrix of interest. + * \param value the value to divide each elements by. + * \throw std::invalid_argument if value is 0. + * \return the resulting matrix. + */ +template +const Matrix3D operator / (Matrix3D m, T value) throw (std::invalid_argument) +{ if(value == static_cast(0)) + { throw std::invalid_argument("division by 0!") ; } + Matrix3D other(m) ; + other /= value ; + return other ; +} + +/*! + * \brief Sends a representation of the matrix to the stream. + * \param stream the stream of interest. + * \param m the matrix of interest. + * \return a reference to the stream. + */ +template +std::ostream& operator << (std::ostream& stream, const Matrix3D& m) +{ m.print(stream) ; + return stream ; +} + + + +// method implementation +template +Matrix3D::Matrix3D(size_t dim1, size_t dim2, size_t dim3) + : Matrix3D(dim1, dim2, dim3, 0) +{} + +template +Matrix3D::Matrix3D(size_t dim1, size_t dim2, size_t dim3, T value) + : Matrix({dim1, dim2, dim3}, value) +{} + +template +Matrix3D::Matrix3D(const Matrix3D &other) + : Matrix(other) +{} + + +template +Matrix3D::Matrix3D(const std::string &file_address) throw (std::runtime_error) +{ + this->_dim = {0,0,0} ; + this->_data = std::vector() ; + this->_dim_size = this->_dim.size() ; + this->_data_size = this->_data.size() ; + this->_dim_prod = std::vector(this->_dim_size, 0) ; + + std::ifstream file(file_address, std::ifstream::in) ; + if(file.fail()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! cannot open %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + std::string buffer_str ; + std::vector buffer_vec ; + T buffer_T ; + + // read file + size_t n_line = 0, n_line_data = 0 ; // number of line and of data line read + size_t row_len = 0, col_len = 0 ; // length of row and column in nber of values + size_t row_len_cur = 0, col_len_cur = 0 ; // current number of values read in row and col + + while(getline(file, buffer_str)) + { if(file.fail()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + // check empty line + if(buffer_str.size() == 0) + { // this file only contains one eol char and should be considered as empty, + // -> returns empty matrix not an error + if(n_line == 0 and file.peek() == EOF and file.eof()) + { break ; } + + file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! while reading %s (empty line)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + + // check whether it is the beginning of a slice + // 1st line in file should be one like this + if(this->is_header(buffer_str)) + { // check that slice have a constant number of rows + if(this->_dim[2] == 1) + { col_len = col_len_cur ; + // this->_dim[0] = row_len ; + // this->_dim[1] = col_len ; + } + else if(col_len_cur != col_len) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions 1 in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + this->_dim[2]++ ; + col_len_cur = 0 ; + n_line++ ; + continue ; + } + // 1st line in file should be a header and entering + // this block is forbidden + if(n_line == 0) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! first line is not a slice header in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + // parse line + row_len_cur = 0 ; + buffer_vec.clear() ; + std::istringstream buffer_ss(buffer_str) ; + while(buffer_ss >> buffer_T) + { buffer_vec.push_back(buffer_T) ; + row_len_cur++ ; + } + // check for an error which likely indicates that a value could not be + // casted into a type T (mixed data types in the file) + if(buffer_ss.fail() and not buffer_ss.eof()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! could not read a line in %s (incompatible data types)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + // check that number of column is constant + if(n_line_data == 0) + { row_len = row_len_cur ; } + else if(row_len_cur != row_len) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions 2 in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + // update matrix content + for(auto i : buffer_vec) + { this->_data.push_back(i) ; + this->_data_size++ ; + } + col_len_cur++ ; + n_line_data++ ; + n_line++ ; + // update matrix dimensions + this->_dim[0] = row_len_cur ; + this->_dim[1] = col_len_cur ; + } + // check dimensions of last slice + if(col_len_cur != this->_dim[1]) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + file.close() ; + this->compute_dim_product() ; +} + + +template +T Matrix3D::get(size_t dim1, size_t dim2, size_t dim3) const throw(std::out_of_range) +{ try + { return this->get({dim1, dim2, dim3}) ; } + catch(std::out_of_range& e) + { throw e ; } +} + +template +void Matrix3D::set(size_t dim1, size_t dim2, size_t dim3, T value) throw(std::out_of_range) +{ try + { return this->set({dim1, dim2, dim3}, value) ; } + catch(std::out_of_range& e) + { throw e ; } +} + + +template +T& Matrix3D::operator () (size_t dim1, size_t dim2, size_t dim3) +{ std::vector coord = {dim2, dim1, dim3} ; + return this->_data[this->convert_to_offset(coord)] ; +} + + +template +void Matrix3D::print(std::ostream& stream, size_t precision, size_t width, char sep) const +{ // if the matrix has at least one 0 dimension (no data), don't do anything + if(this->_dim[0]==0 or this->_dim[1]==0 or this->_dim[2]==0) + { return ; } + + stream.setf(std::ios::left) ; + stream << std::setprecision(precision) << std::fixed ; + std::vector dim = this->get_dim() ; + + size_t n = 0 ; + size_t n_tot = std::accumulate(dim.begin(), dim.end(), 1, std::multiplies()) ; + + for(size_t z=0; z +const T& Matrix3D::operator () (size_t dim1, size_t dim2, size_t dim3) const +{ std::vector coord = {dim2, dim1, dim3} ; + return this->_data[this->convert_to_offset(coord)] ; +} + + +template +bool Matrix3D::is_header(const std::string& str) const +{ if(str[0] == ',' and + str[1] == ',' and + str.find(',', 2) == std::string::npos) + { return true ; } + return false ; +} + +#endif // MATRIX3D_HPP diff --git a/src/Matrix/Matrix4D.hpp b/src/Matrix/Matrix4D.hpp new file mode 100644 index 0000000..d9cb111 --- /dev/null +++ b/src/Matrix/Matrix4D.hpp @@ -0,0 +1,594 @@ +#ifndef MATRIX4D_HPP +#define MATRIX4D_HPP + +#include + +#include +#include +#include // runtime_error, out_of_range +#include +#include // setw(), setprecision(), fixed +#include // ifstream +#include // sstream + + +#define BUFFER_SIZE 4096 + + +/*! + * The Matrix4D class is a specialisation of the Matrix + * class to make work with 4D matrices more easily. + * + * A text file format is defined to store such matrices. The specifications are as + * follows : + * Absolutely NO empty lines are allowed! + * The following lines should contain : + * + * 1st line : a slice header ',,,0' indicating that a slice of the 4th dimension + * is beginning. + * 3nd - Nth line : the slice of the 4th dimension. It contains slice in the 3rd dimension + * which are 2D matrices separated by headers (',,0' and ',,1', in the + * example below, they have 2x3 dimensions). + * N+1th line : ',,,1' indicating that the 2nd slice of the 4th dimension is beginning. + * and so on... + * Example + * ---- start ---- + * ,,,0 + * ,,0 + * 1 2 3 + * 4 5 6 + * ,,1 + * 7 8 9 + * 10 11 12 + * ,,,1 + * ,,0 + * 21 22 23 + * 24 25 26 + * ,,1 + * 27 28 29 + * 30 31 32 + * ----- end ----- + * + * Constructing a matrix from an empty file (0 bytes or only an EOL char) returns a null + * matrix (0x0x0x0 dimensions). Writting a null matrix (that is with at least one null + * dimension creates an empty file. + * + */ +template +class Matrix4D : public Matrix +{ + public: + // constructors + Matrix4D() = default ; + /*! + * \brief Constructs a matrix with the given dimensions, + * filled with 0 values. + * \param dim1 the first dimension. + * \param dim2 the second dimension. + * \param dim3 the third dimension. + * \param dim4 the fourth dimension. + */ + Matrix4D(size_t dim1, size_t dim2, size_t dim3, size_t dim4) ; + /*! + * \brief Constructs a matrix with the given dimensions and + * initialize the values to the given value. + * \param dim1 the first dimension. + * \param dim2 the second dimension. + * \param dim3 the third dimension. + * \param dim4 the fourth dimension. + * \param value the value to initialize the matrix content + * with. + */ + Matrix4D(size_t dim1, size_t dim2, size_t dim3, size_t dim4, T value) ; + /*! + * \brief Copy constructor + * \param other the matrix to copy the content from. + */ + Matrix4D(const Matrix4D& other) ; + /*! + * \brief Constructs a matrix from a text file. A matrix contructed + * from an empty file (or a file containing only one EOL char) returns + * an empty matrix (null dimensions). + * \param file_address the address of the file containing the matrix. + * \throw std::runtime_error if anything happen while reading the + * file (format error, file not found, etc). + */ + Matrix4D(const std::string& file_address) throw (std::runtime_error) ; + + /*! + * \brief Destructor. + */ + virtual ~Matrix4D() = default ; + + // methods overloaded from Matrix + using Matrix::get ; + using Matrix::set ; + + // methods OK + /*! + * \brief Gets the element at the given coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param dim4 the fourth dimension coordinate. + * \throw std::out_of_range exception if the coordinates + * are out of range. + * \return the element. + */ + T get(size_t dim1, size_t dim2, size_t dim3, size_t dim4) const throw (std::out_of_range) ; + /*! + * \brief Sets the element at the given coordinates + * to the given value. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param dim4 the fourth dimension coordinate. + * \param value the new value. + * \throw std::out_of_range exception if the coordinates + * are out of range. + */ + void set(size_t dim1, size_t dim2, size_t dim3, size_t dim4, T value) throw (std::out_of_range) ; + /*! + * \brief Produces a nice representation of the matrix on the given + * stream. + * \param stream the stream. + * \param precision the rounding precision. + * \param width the column width in number of characters. + * \param sep the character separator. + */ + virtual void print(std::ostream& stream, size_t precision=4 ,size_t width=8, char sep=' ') const override ; + + // operators OK + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param dim4 the third dimension coordinate. + * \return a reference to this element. + */ + T& operator() (size_t dim1, size_t dim2, size_t dim3, size_t dim4) ; + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param dim4 the third dimension coordinate. + * \return a reference to this element. + */ + const T& operator() (size_t dim1, size_t dim2, size_t dim3, size_t dim4) const ; + + private: + // methods + /*! + * \brief Checks whether a given string is a 3D header + * (such as ",,0"), as found in files storing Matrix4D. + * \param str the string to check. + * \return whether the string is such a slice header. + */ + bool is_header_3d(const std::string& str) const ; + /*! + * \brief Checks whether a given string is a 4D header + * (such as ",,,0"), as found in files storing Matrix4D. + * \param str the string to check. + * \return whether the string is such a slice header. + */ + bool is_header_4d(const std::string& str) const ; + + /*! + * \brief Routine to load 4D matrices from files. + * This method reads from a std::ifstream object, + * from the current pointer location until i) a 4D + * header line is found (such as ',,,1') or ii) until + * it cannot read anymore from the stream. All + * data are pushed back into the data vector and + * the dimensions of the data read are stored into + * the dim vector (these data are actually a 3D + * matrix). If the method returned because it + * found another 4D header, it returns true, false + * otherwise. + * To read an entire 4D matrix from a file, simply + * use this scheme : i) read the 1st 4D header + * ii) call this function while it returns true. + * \param file_name a reference to a string containing + * the address of the file currently read (for exception + * messages). + * \param file a reference to the std::ifstream to read + * from. Obviously, the stream state will be modified as + * the method reads from it. However, it will never be + * closed by the method. + * \param data a reference to an empty vector where the + * read data will be pushed back. + * \param dim a reference to an empty vector where the + * dimensions of the read data will be stored. + * \return whether the last piece of data read from the + * stream was a 4D header. + */ + bool get_3d_slice(const std::string& file_name, std::ifstream& file, + std::vector& data, std::vector& dim) const throw (std::runtime_error) ; + +} ; + +// operators +/*! + * \brief Addition operator. + * \param m the matrix of interest + * \param value the value to add to each element. + * \return the resulting matrix. + */ +template +const Matrix4D operator + (Matrix4D m, T value) +{ Matrix4D other(m) ; + m += value ; + return m ; +} + +/*! + * \brief Substraction operator + * \param m the matrix of interest. + * \param value the value to substract to each element. + * \return the resulting matrix. + */ +template +const Matrix4D operator - (Matrix4D m, T value) +{ Matrix4D other(m) ; + m -= value ; + return m ; +} + +/*! + * \brief Multiplication operator. + * \param m the matrix of interest. + * \param value the value to multiply each elements by. + * \return the resulting matrix. + */ +template +const Matrix4D operator * (Matrix4D m, T value) +{ Matrix4D other(m) ; + m *= value ; + return m ; +} + +/*! + * \brief Division operator. + * \param m the matrix of interest. + * \param value the value to divide each elements by. + * \throw std::invalid_argument if value is 0. + * \return the resulting matrix. + */ +template +const Matrix4D operator / (Matrix4D m, T value) throw (std::invalid_argument) +{ if(value == static_cast(0)) + { throw std::invalid_argument("division by 0!") ; } + Matrix4D other(m) ; + other /= value ; + return other ; +} + +/*! + * \brief Sends a representation of the matrix to the stream. + * \param stream the stream of interest. + * \param m the matrix of interest. + * \return a reference to the stream. + */ +template +std::ostream& operator << (std::ostream& stream, const Matrix4D& m) +{ m.print(stream) ; + return stream ; +} + + + +// method implementation +template +Matrix4D::Matrix4D(size_t dim1, size_t dim2, size_t dim3, size_t dim4) + : Matrix({dim1, dim2, dim3, dim4}, 0) +{} + +template +Matrix4D::Matrix4D(size_t dim1, size_t dim2, size_t dim3, size_t dim4, T value) + : Matrix({dim1, dim2, dim3, dim4}, value) +{} + +template +Matrix4D::Matrix4D(const Matrix4D &other) + : Matrix(other) +{} + +template +Matrix4D::Matrix4D(const std::string &file_address) throw (std::runtime_error) +{ this->_dim = {0,0,0,0} ; + this->_data = std::vector() ; + this->_dim_size = this->_dim.size() ; + this->_data_size = this->_data.size() ; + this->_dim_prod = std::vector(this->_dim_size, 0) ; + + std::ifstream file(file_address, std::ifstream::in) ; + if(file.fail()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! cannot open %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + std::string buffer_str ; + std::vector buffer_t ; + std::vector dim ; + + // read 1st line + getline(file, buffer_str) ; + + // empty line + if(buffer_str.size() == 0) + { // this file only contains one eol char and should be considered as empty, + // -> returns empty matrix not an error + if(file.peek() == EOF and file.eof()) + { file.close() ; + return ; + } + file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s (empty line)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + if(file.fail()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + bool found_4d_header = this->is_header_4d(buffer_str) ; + do + { if(file.fail()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + // check empty line + if(buffer_str.size() == 0) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s (empty line)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + // this is the beginning of a 3D slice -> get it using routine + if(found_4d_header) + { try + { // get slice + buffer_t.clear() ; + dim.clear() ; + found_4d_header = this->get_3d_slice(file_address, file, buffer_t, dim); + // update data + for(const auto& i : buffer_t) + { this->_data.push_back(i) ; + this->_data_size++ ; + } + // update dim only for the 1st slice (the 1st slice set the dimensions) + if(this->_dim[3] == 0) + { this->_dim[0] = dim[0] ; + this->_dim[1] = dim[1] ; + this->_dim[2] = dim[2] ; + } + // check dimensions of the slice + else + { if(dim[0] != this->_dim[0] or + dim[1] != this->_dim[1] or + dim[2] != this->_dim[2]) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + } + this->_dim[3]++ ; + } + catch(std::runtime_error& e) + { file.close() ; + throw e ; + } + } + // this is an error, everything between two ',,,N' header + // should be read at once. The only way out of the loop + // is that no more header has been read because of eof + else if(not found_4d_header and not file.eof()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + } while(found_4d_header) ; + + file.close() ; + this->compute_dim_product() ; +} + +template +T Matrix4D::get(size_t dim1, size_t dim2, size_t dim3, size_t dim4) const throw (std::out_of_range) +{ try + { return this->get({dim1, dim2, dim3, dim4}) ; } + catch(std::out_of_range& e) + { throw e ; } +} + +template +void Matrix4D::set(size_t dim1, size_t dim2, size_t dim3, size_t dim4, T value) throw (std::out_of_range) +{ try + { this->set({dim1, dim2, dim3, dim4}, value) ; } + catch(std::out_of_range& e) + { throw e ; } +} + +template +void Matrix4D::print(std::ostream &stream, size_t precision, size_t width, char sep) const +{ // if the matrix has at least one 0 dimension (no data), don't do anything + if(this->_dim[0]==0 or this->_dim[1]==0 or this->_dim[2]==0 or this->_dim[3]==0) + { return ; } + + stream.setf(std::ios::left) ; + stream << std::setprecision(precision) << std::fixed ; + std::vector dim = this->get_dim() ; + + size_t n = 0 ; + size_t n_tot = std::accumulate(dim.begin(), dim.end(), 1, std::multiplies()) ; + + for(size_t dim4=0; dim4 +T& Matrix4D::operator () (size_t dim1, size_t dim2, size_t dim3, size_t dim4) +{ std::vector coord = {dim2, dim1, dim3, dim4} ; + return this->_data[this->convert_to_offset(coord)] ; +} + +template +const T& Matrix4D::operator () (size_t dim1, size_t dim2, size_t dim3, size_t dim4) const +{ std::vector coord = {dim2, dim1, dim3, dim4} ; + return this->_data[this->convert_to_offset(coord)] ; +} + +template +bool Matrix4D::is_header_3d(const std::string &str) const +{ if(str[0] == ',' and + str[1] == ',' and + str.find(',', 2) == std::string::npos) + { return true ; } + return false ; +} + +template +bool Matrix4D::is_header_4d(const std::string &str) const +{ if(str[0] == ',' and + str[1] == ',' and + str[2] == ',' and + str.find(',', 3) == std::string::npos) + { return true ; } + return false ; +} + +template +bool Matrix4D::get_3d_slice(const std::string& file_name, std::ifstream& file, + std::vector &data, std::vector &dim) const throw (std::runtime_error) +{ + bool found_4d_header = false ; // the flag to return + + dim = {0,0,0} ; + + std::string buffer_str ; + std::vector buffer_vec ; + T buffer_T ; + + size_t n_line = 0, n_line_data = 0 ; // number of line and of data line read + size_t row_len = 0, col_len = 0 ; // length of row and column in nber of values + size_t row_len_cur = 0, col_len_cur = 0 ; // current number of values read in row and col + + while(getline(file, buffer_str)) + { if(file.fail()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + // check empty line + if(buffer_str.size() == 0) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s (empty line)", file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + // check whether this is the beginning of a 4D slice header, if so + // break + if(this->is_header_4d(buffer_str)) + { found_4d_header = true ; + break ; + } + // check whether it is the beginning of a slice + // 1st line in file should be + if(this->is_header_3d(buffer_str)) + { // check that slice have a constant number of rows + if(dim[2] == 1) + { col_len = col_len_cur ; + // dim[0] = row_len ; + // dim[1] = col_len ; + } + else if(col_len_cur != col_len) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + dim[2]++ ; + col_len_cur = 0 ; + n_line++ ; + continue ; + } + // 1st line in file should be a header and entering + // this block is forbidden + if(n_line == 0) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! first line is not a slice header in %s", file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + + // parse line + row_len_cur = 0 ; + buffer_vec.clear() ; + std::istringstream buffer_ss(buffer_str) ; + while(buffer_ss >> buffer_T) + { buffer_vec.push_back(buffer_T) ; + row_len_cur++ ; + } + // check for an error which likely indicates that a value could not be + // casted into a type T (mixed data types in the file) + if(buffer_ss.fail() and not buffer_ss.eof()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! could not read a line in %s (incompatible data types)", file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + + // check that number of column is constant + if(n_line_data == 0) + { row_len = row_len_cur ; } + else if(row_len_cur != row_len) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + + // update matrix content + for(auto i : buffer_vec) + { data.push_back(i) ; } + col_len_cur++ ; + n_line_data++ ; + n_line++ ; + // update dimension + dim[0] = row_len_cur ; + dim[1] = col_len_cur ; + } + + // check dimensions of last slice + if(col_len_cur != dim[1]) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions 333 in %s", file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + + return found_4d_header ; +} + + +#endif // MATRIX4D_HPP diff --git a/src/Matrix_old/Matrix.hpp b/src/Matrix_old/Matrix.hpp new file mode 100755 index 0000000..7568412 --- /dev/null +++ b/src/Matrix_old/Matrix.hpp @@ -0,0 +1,506 @@ +#ifndef MATRIX_HPP +#define MATRIX_HPP + + +#include +#include // accumulate() +#include +#include // out_of_range + + + +/*! + * \brief The Matrix class is a generic class to store data in a matrix. + * The matrix dimensionality can be any value : 1 is a vector, 2 is a regular + * 2D matrix, 3 is a 3D matrix, etc. + * + * In order to store the data properly and to perform all operations smoothly, the + * internal representation format differs from the "usual format". That is : the user + * provides coordinates as (x,y,z,...) where x referes to the row number, y to + * the column number, z the the z slice, etc. + * Internally however, x corresponds to the column number and y to the row number. + * Every other dimension has the same meaning. + * + * Internal representation : + * + * Here is an example of a 2x3 matrix (2D) + * + * {0,1,2,3,4,5} vector is turned to + * X + * ----------> + * 0 1 2 | + * 3 4 5 | Y + * \|/ + * + * dimensions are stored as {nx, ny} which corresponds to {ncol, nrow}. Coordinates + * are given using the universal format coord=(x,y) which are interpreted as {row, col}. + * Thus a simple swap(coord[0],coord[1]) should be performed to ensurethat the user given + * coordinates can be used in this referencial. + * + * + * Here is an example of a 2x3x2x2 matrix(4D) + * {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23} is turned to + * + * X + * -----------> | | + * 0 1 2 | | | + * 3 4 5 | Y | | + * \|/ | Z | + * 6 7 8 | | | + * 9 10 11 | Y | | + * \|/ \|/ | + * | A + * 12 13 14 | | | + * 15 16 17 | Y | | + * \|/ | Z | + * 18 19 20 | | | + * 21 22 23 | Y | | + * \|/ \|/ \|/ + * + * dimensions are stored as {nx, ny, nz, na} which corredponds to {ncol, nrow, nz, na}. + * Coordinates are given using the universal format coord=(x,y,z,a) which are interpreted + * as {row, col, z, a}. Thus a simple swap(coord[0],coord[1]) should be performed to ensure + * that the user given coordinates can be used in this referencial. + * + */ + +template +class Matrix +{ + public: + // constructors + Matrix() = default ; + + /*! + * \brief Constructs an matrix with the given dimension with + * 0 values. + * \param dim the dimensions. + */ + Matrix(const std::vector& dim) ; + + /*! + * \brief Constructs a matrix with the given dimensions and + * initialize the values to the given value. + * \param dim the dimensions. + * \param value the value to initialize the matrix content + * with. + */ + Matrix(const std::vector& dim, T value) ; + + /*! + * \brief Copy constructor. + * \param other the matrix to copy. + */ + Matrix (const Matrix& other) ; + + // methods + /*! + * \brief Gets the element at the given offset. + * \param offset the offset of the element to get. + * \throw std::out_of_range exception if the offset + * is out of range. + * \return the element. + */ + T get(size_t offset) const /* throw(std::out_of_range) */ ; + + /*! + * \brief Gets the element at the given coordinates. + * \param coord the coordinates of the element to get. + * \throw std::out_of_range exception if the coordinates + * are out of range. + * \return the element. + */ + T get(const std::vector& coord) const /* throw(std::out_of_range) */ ; + + /*! + * \brief Sets the element at the given offset + * to the given value. + * \param offset the offset of the element to set. + * \param value the new value. + * \throw std::out_of_range exception if the offset + * is out of range. + */ + void set(size_t offset, T value) /*throw(std::out_of_range) */ ; + /*! + * \brief Sets the element at the given coordinates + * to the given value. + * \param coord the coordinates of the element to set. + * \param value the new value. + * \throw std::out_of_range exception if the coordinates + * are out of range. + */ + void set(const std::vector& coord, T value) /* throw(std::out_of_range) */ ; + + /*! + * \brief Gets the matrix dimensions. + * \return the dimensions. + */ + std::vector get_dim() const ; + + /*! + * \brief Gets the data contained in the + * matrix as a vector. + * \return a a vector containing the data. + */ + std::vector get_data() ; + + /*! + * \brief Gets the number of dimensions (the length + * of the dimension vector). + * \return the number of dimensions + */ + size_t get_dim_size() const ; + + /*! + * \brief Gets the number of elements contained in the + * matrix. + * \return the number of element contained in the + * matrix. + */ + size_t get_data_size() const ; + + // operator + /*! + * \brief Assignment operator. + * \param other an other matrix to copy the values from. + * \return a reference to the current instance. + */ + Matrix& operator = (const Matrix& other) ; + + /*! + * \brief Comparison operator, returns true if + * both matrices are identical, that is do not + * have the same data and dimensions. + * \param other an other matrix. + * \return true if both matrices have the same + * data and dimensions. + */ + bool operator == (const Matrix& other) const ; + + /*! + * \brief Comparison operator, returns true if + * both matrices are different, that is do not + * have the same data and dimensions. + * \param other an other matrix. + * \return true if both matrices are different. + */ + bool operator != (const Matrix& other) const ; + + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param coord coord the coordinates of the element to get. + * \return a reference to this element. + */ + T& operator () (const std::vector& coord) ; + + /*! + * \brief Returns a const reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param coord coord the coordinates of the element to get. + * \return a const reference to this element. + */ + const T& operator () (const std::vector& coord) const ; + + protected: + // methods + /*! + * \brief Computes the partial dimension products and fills + * this->dim_prod according to the current values of + * this->_dim and this->dim_size. These values are used to + * access the elements given a set of coordinates. + */ + void compute_dim_product() ; + + /*! + * \brief Given a vector of at least 2 elements corresponding + * to coordinates, it simply swaps the elements at index 0 (row + * number) and 1 (column number) to make them fit the x,y,... + * matrix reprensetation (x:number of columns, y:number of rows). + * \param coord a vector of coordinates (row, column, ...). + * \return a vector of coordinates corresponding to (x,y,...). + */ + std::vector swap_coord(const std::vector& coord) const ; + + /*! + * \brief Complementary function of convert_coord(). Given + * a vector of coordinates in (x,y,...) format, it turns it + * into (row,col,...) format. + * \param coord a vector of coordinates (x,y, ...). + * \return a vector of coordinates corresponding to (row,col,...). + */ + std::vector convert_coord_back(const std::vector& coord) const ; + + /*! + * \brief Checks whether a given offset is a valid offset or + * whether it is out of range. + * \param offset the offset to check. + * \return whether the offset is valid. + */ + bool is_valid(size_t offset) const ; + + /*! + * \brief Checks whether coordinates in (x,y,...) format are + * valid or whether they are out of range. + * \param offset the offset to check. + * \return whether the offset is valid. + */ + bool is_valid(const std::vector& coord) const ; + + /*! + * \brief Converts a vector of VALID (x,y,...) coordinates to a + * the corresponding offset allowing to get an element in the + * data vector. + * If the coordinate vector has a (row, column, ...) format, the + * result will be wrong. + * \param coord a vector of coordinates with (x,y,...) format. + * \return the corresponding offset. + */ + size_t convert_to_offset(const std::vector& coord) const ; + + /*! + * \brief Complementary function of convert_to_offset(). Given an + * offset, this function returns the corresponding coordinate + * vector in (x,y,...) format. + * \param offset a given offset. + * \return the corresponding vector of (x,y,..) coordinates. + */ + std::vector convert_to_coord(size_t offset) const ; + + // fields + /*! + * \brief The dimensions values. + */ + std::vector _dim ; + /*! + * \brief Stores the data. + */ + std::vector _data ; + /*! + * \brief The number of dimensions. + */ + size_t _dim_size ; + /*! + * \brief The number of data elements stored. + */ + size_t _data_size ; + + /*! + * \brief Contains the partial product of the dimensions. That is, + * the ith element contains the product of all the i-1 precedent + * dimensions : + * element 0 : 1, element 1 : x, element 2 : x*y, element 3 : x*y*z, + * and so one. + * This is used for coordinates to offset and offset to coordinates + * conversions. + */ + std::vector _dim_prod ; +} ; + + +/*! + * \brief Sends a representation of the matrix to the stream. + * \param stream the stream of interest. + * \param m the matrix of interest. + * \return a reference to the stream. + */ +template +std::ostream& operator << (std::ostream& stream, const Matrix& m) +{ for(size_t i=0; i +Matrix::Matrix(const std::vector& dim) + : Matrix(dim, 0) +{} + +template +Matrix::Matrix(const std::vector& dim, T value) +{ this->_dim_size = dim.size() ; + this->_dim = this->swap_coord(dim) ; + this->_data_size = std::accumulate(dim.begin(), dim.end(), 1, std::multiplies()) ; + this->_data = std::vector(this->_data_size, value) ; + + this->compute_dim_product() ; +} + +template +Matrix::Matrix(const Matrix &other) +{ *this = other ; } + + +template +T Matrix::get(size_t offset) const /* throw(std::out_of_range) */ +{ if(not this->is_valid(offset)) + { throw std::out_of_range("offset is out of range!") ; } + return this->_data[offset] ; +} + +template +T Matrix::get(const std::vector& coord) const /*throw(std::out_of_range) */ +{ std::vector coord_new = this->swap_coord(coord) ; + if(not this->is_valid(coord_new)) + { throw std::out_of_range("coordinates are out of range!") ; } + return this->_data[this->convert_to_offset(coord_new)] ; +} + + +template +void Matrix::set(size_t offset, T value) /* throw(std::out_of_range) */ +{ if(not this->is_valid(offset)) + { throw std::out_of_range("offset is out of range!") ; } + this->_data[offset] = value ; +} + +template +void Matrix::set(const std::vector& coord, T value) /* throw(std::out_of_range) */ +{ std::vector coord_new = this->swap_coord(coord) ; + if(not this->is_valid(coord_new)) + { throw std::out_of_range("coordinates are out of range!") ; } + this->_data[this->convert_to_offset(coord_new)] = value ; +} + + +template +std::vector Matrix::get_dim() const +{ return this->swap_coord(this->_dim) ; } + +template +std::vector Matrix::get_data() +{ return this->_data ; } + +template +size_t Matrix::get_dim_size() const +{ return this->_dim_size ; } + +template +size_t Matrix::get_data_size() const +{ return this->_data_size ; } + + +template +Matrix& Matrix::operator = (const Matrix& other) +{ this->_dim = other._dim ; + this->_dim_size = other._dim_size ; + this->_data = other._data ; + this->_data_size = other._data_size ; + this->_dim_prod = other._dim_prod ; + return *this ; +} + +template +bool Matrix::operator == (const Matrix& other) const +{ if(&other == this) + { return true ; } + // check dim + if(this->_dim_size != other._dim_size) + { return false ; } + for(size_t i=0; i_dim_size; i++) + { if(this->_dim[i] != other._dim[i]) + { return false ; } + } + // check data + if(this->_data_size != other._data_size) + { return false ; } + for(size_t i=0; i_data_size; i++) + { if(this->_data[i] != other._data[i]) + { return false ; } + } + return true ; +} + +template +bool Matrix::operator !=(const Matrix& other) const +{ return not ((*this) == other) ;} + +template +T& Matrix::operator () (const std::vector& coord) +{ std::vector coord_new = this->swap_coord(coord) ; + return this->_data[this->convert_to_offset(coord_new)] ; +} + +template +const T& Matrix::operator () (const std::vector& coord) const +{ std::vector coord_new = this->swap_coord(coord) ; + return this->_data[this->convert_to_offset(coord_new)] ; +} + + +template +void Matrix::compute_dim_product() +{ this->_dim_prod = std::vector(this->_dim_size, 0) ; + this->_dim_prod[0] = 1 ; + if(this->_dim_size > 1) + { this->_dim_prod[1] = this->_dim[0] ; } + if(this->_dim_size > 2) + { for(size_t i=2; i_dim_size; i++) + { this->_dim_prod[i] = this->_dim_prod[i-1]*this->_dim[i-1] ; } + } +} + + +template +std::vector Matrix::swap_coord(const std::vector &coord) const +{ std::vector coord_new = coord ; + // reformat coord = (row,col,...) = (y,y,...) into coord = (col,row,...) = (x,y,...) + if(this->_dim_size > 1) + { std::swap(coord_new[0], coord_new[1]) ; } + return coord_new ; +} + + +template +bool Matrix::is_valid(size_t offset) const +{ if(offset > this->_data_size-1) + { return false ; } + return true ; +} + +template +bool Matrix::is_valid(const std::vector& coord) const +{ if(coord.size() != this->_dim_size) + { return false ; } + for(size_t i=0; i this->_dim[i]) + { return false ; } + } + return true ; +} + + + +template +size_t Matrix::convert_to_offset(const std::vector& coord) const +{ size_t offset = 0 ; + + for(size_t i=0; i_dim_size; i++) + { offset += coord[i] * this->_dim_prod[i] ; } + + return offset ; +} + + +template +std::vector Matrix::convert_to_coord(size_t offset) const +{ + std::vector coord(this->_dim_size, 0) ; + + for(int i=this->_dim_size-1; i>=0; i--) + { size_t c = offset / this->_dim_prod[i] ; + coord[i] = c ; + offset -= (this->_dim_prod[i]*c) ; + } + + return coord ; +} + + + + +#endif // MATRIX_HPP diff --git a/src/Matrix_old/Matrix2D.hpp b/src/Matrix_old/Matrix2D.hpp new file mode 100755 index 0000000..2a18813 --- /dev/null +++ b/src/Matrix_old/Matrix2D.hpp @@ -0,0 +1,414 @@ +#ifndef MATRIX2D_HPP +#define MATRIX2D_HPP + +#include "Matrix.hpp" + +#include +#include +#include +#include +#include +#include + +#define BUFFER_SIZE 4096 +// const size_t BUFFER_SIZE = 4096 ; + +/*! The Matrix2D class is a specialisation of the Matrix + * class to make work with 2D matrices easier. + * + * A text format is defined to store such matrices. + * In this format, each row is written on a single line + * and the values should separated by any blank character + * (tab, space, multiple spaces, ...). Empty lines are + * not allowed. + * + * ---- start ---- + * 1 2 3 + * 4 5 6 + * 7 8 9 + * ----- end ----- + * + */ +template +class Matrix2D : public Matrix +{ + public: + // constructors + Matrix2D() = default ; + /*! + * \brief Constructs a matrix with the given dimensions, + * filled with 0 values. + * \param nrow the number of rows. + * \param ncol the number of columns. + */ + Matrix2D(size_t nrow, size_t ncol) ; + + /*! + * \brief Constructs a matrix with the given dimensions and + * initialize the values to the given value. + * \param nrow the number of rows. + * \param ncol the number of columns. + * \param value the value to initialize the matrix content + * with. + */ + Matrix2D(size_t nrow, size_t ncol, T value) ; + + /*! + * \brief Copy constructor + * \param other the matrix to copy the content from. + */ + Matrix2D(const Matrix2D& other) ; + + /*! + * \brief Constructs a matrix from a text file. + * \param file_address the address of the file containing the matrix. + * \throw std::runtime_error if anything happen while reading the + * file (format error, file not found, etc). + */ + Matrix2D(const std::string& file_address) /* throw (std::runtime_error) */ ; + + // methods overloaded in Matrix + using Matrix::get ; + using Matrix::set ; + + // methods + /*! + * \brief Gets the element at the given coordinates. + * \param row the row number of the element to set. + * \param col the column number of the element to set. + * \throw std::out_of_range exception if the coordinates + * are out of range. + * \return the element. + */ + T get(size_t row, size_t col) const /* throw(std::out_of_range) */ ; + + /*! + * \brief Sets the element at the given coordinates + * to the given value. + * \param row the row number of the element to set. + * \param col the column number of the element to set. + * \param value the new value. + * \throw std::out_of_range exception if the coordinates + * are out of range. + */ + void set(size_t row, size_t col, T value) /* throw (std::out_of_range) */ ; + + /*! + * \brief Gets the number of rows. + * \return the number of rows. + */ + size_t get_nrow() const ; + + /*! + * \brief Gets the number of columns. + * \return the number of columns. + */ + size_t get_ncol() const ; + + /*! + * \brief Gets the values in the i-th row. + * \param i the row of interest. + * \throw std::out_of_range if i is out of range. + * \return the values in this row. + */ + std::vector get_row(size_t i) const /* throw (std::out_of_range) */ ; + + /*! + * \brief Gets the values in the i-th column. + * \param i the column of interest. + * \throw std::out_of_range if i is out of range. + * \return the values in this column. + */ + std::vector get_col(size_t i) const /* throw (std::out_of_range) */ ; + + /*! + * \brief Sets the values of a given rows with the values of a given + * vector. + * \param i the row of interest. + * \param values the new values. + * \throw std::out_of_range if i is out of range. + * \throw std::invalid_argument if values does not have a length equal + * to the number of columns of the matrix. + */ + void set_row(size_t i, const std::vector& values) /* throw (std::out_of_range, std::invalid_argument) */ ; + + /*! + * \brief Sets the values of a given column with the values of a given + * vector. + * \param i the column of interest. + * \param values the new values. + * \throw std::out_of_range if i is out of range. + * \throw std::invalid_argument if values does not have a length equal + * to the number of rows of the matrix. + */ + void set_col(size_t i, const std::vector& values) /* throw (std::out_of_range, std::invalid_argument) */ ; + + /*! + * \brief Produces a nice representation of the matrix on the given + * stream. + * \param stream the stream. + * \param precision the rounding precision. + * \param width the column width in number of characters. + * \param sep the character separator. + */ + void print(std::ostream& stram, size_t precision=4, size_t width=6, char sep=' ') const ; + + // operators + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param row the row number of the element to set. + * \param col the column number of the element to set. + * \return a reference to this element. + */ + T& operator () (size_t row, size_t col) ; + + /*! + * \brief Returns a const reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param row the row number of the element to set. + * \param col the column number of the element to set. + * \return a const reference to this element. + */ + const T& operator () (size_t row, size_t col) const ; + +} ; + +/*! + * \brief Sends a representation of the matrix to the stream. + * \param stream the stream of interest. + * \param m the matrix of interest. + * \return a reference to the stream. + */ +template +std::ostream& operator << (std::ostream& stream, const Matrix2D& m) +{ m.print(stream) ; + return stream ; +} + + +/*! + * \brief Produces a transpose of the given matrix. + * \param m a matrix. + */ +template +Matrix2D transpose(const Matrix2D& m) ; + + + + +template +Matrix2D transpose(const Matrix2D& m) +{ std::vector dim = m.get_dim() ; + size_t nrow = dim[0] ; + size_t ncol = dim[1] ; + Matrix2D m2(ncol, nrow, 0) ; + for(size_t i=0; i +Matrix2D::Matrix2D(size_t nrow, size_t ncol) + : Matrix2D(nrow, ncol, 0) +{} + +template +Matrix2D::Matrix2D(size_t nrow, size_t ncol, T value) + : Matrix({nrow, ncol}, value) +{} + +template +Matrix2D::Matrix2D(const Matrix2D& other) + : Matrix(other) +{} + +template +Matrix2D::Matrix2D(const std::string &file_address) /* throw (std::runtime_error) */ +// : Matrix({0,0}) +{ + this->_dim = {0,0} ; + this->_data = std::vector() ; + this->_dim_size = this->_dim.size() ; + this->_data_size = this->_data.size() ; + this->_dim_prod = std::vector(this->_dim_size, 0) ; + + std::ifstream file(file_address, std::ifstream::in) ; + if(file.fail()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! cannot open %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + std::string buffer_str ; + std::vector buffer_vec ; + T buffer_T ; + + // read file + size_t i = 0 ; + size_t row_len = 0 ; + while(getline(file, buffer_str)) + { // check stream status and read content + if(file.eof()) + { break ; } + if(buffer_str.size() == 0) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! while reading %s (empty line)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + if(file.fail()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + // parse line + buffer_vec.clear() ; + std::istringstream buffer_ss(buffer_str) ; + while(buffer_ss >> buffer_T) + { buffer_vec.push_back(buffer_T) ; } + // check for an error which likely indicates that a value could not be + // casted into a type T (mixed data types in the file) + if(buffer_ss.fail() and not buffer_ss.eof()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! could not read a line in %s (incompatible data types)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + // check that number of column is constant + if(i == 0) + { row_len = buffer_vec.size() ; } + else if(buffer_vec.size() != row_len) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! variable number of columns in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + // update matrix content + for(auto i : buffer_vec) + { this->_data.push_back(i) ; + this->_data_size++ ; + } + this->_dim[1]++ ; + i++ ; + } + file.close() ; + + this->_dim[0] = row_len ; + this->compute_dim_product() ; +} + + + +template +T Matrix2D::get(size_t row, size_t col) const /* throw(std::out_of_range) */ +{ try + { return this->get({row, col}) ; } + catch(std::out_of_range& e) + { throw e ; } +} + + +template +void Matrix2D::set(size_t row, size_t col, T value) /* throw(std::out_of_range) */ +{ try + { this->set({row, col}, value) ; } + catch(std::out_of_range& e) + { throw e ; } +} + + +template +size_t Matrix2D::get_nrow() const +{ return this->_dim[1] ; } + + +template +size_t Matrix2D::get_ncol() const +{ return this->_dim[0] ; } + + +template +std::vector Matrix2D::get_row(size_t i) const /* throw (std::out_of_range) */ +{ if(i>=this->get_nrow()) + { throw std::out_of_range("row index is out of range!") ; } + + std::vector row(this->get_ncol()) ; + for(size_t j=i*this->get_ncol(), n=0; nget_ncol(); j++, n++) + { row[n] = this->_data[j] ; } + + return row ; +} + + +template +std::vector Matrix2D::get_col(size_t i) const /* throw (std::out_of_range) */ +{ if(i>=this->get_ncol()) + { throw std::out_of_range("column index is out of range!") ; } + + std::vector col(this->get_nrow()) ; + for(size_t j=i, n=0; nget_nrow(); j+=this->get_ncol(), n++) + { col[n] = this->_data[j] ; } + + return col ; +} + + +template +void Matrix2D::set_row(size_t i, const std::vector& values) /* throw (std::out_of_range, std::invalid_argument) */ +{ if(i>=this->get_nrow()) + { throw std::out_of_range("row index is out of range!") ; } + else if(values.size() != this->get_ncol()) + { throw std::invalid_argument("the given vector length is not equal to the number of columns!") ; } + + for(size_t j=i*this->get_ncol(), n=0; nget_ncol(); j++, n++) + { this->_data[j] = values[n] ; } +} + + +template +void Matrix2D::set_col(size_t i, const std::vector& values) /* throw (std::out_of_range, std::invalid_argument) */ +{ if(i>=this->get_ncol()) + { throw std::out_of_range("row index is out of range!") ; } + else if(values.size() != this->get_nrow()) + { throw std::invalid_argument("the given vector length is not equal to the number of rows!") ; } + + for(size_t n=0, j=i; nget_nrow(); n++, j+=this->get_ncol()) + { this->_data[j] = values[n] ; } +} + +template +void Matrix2D::print(std::ostream& stream, size_t precision, size_t width, char sep) const +{ stream.setf(std::ios::left) ; + + for(size_t i=0; iget_nrow(); i++) + { for(size_t j=0; jget_ncol(); j++) + { stream << std::setprecision(precision) << std::setw(width) << (*this)(i,j) << sep ; } + stream << std::endl ; + } +} + +template +T& Matrix2D::operator () (size_t row, size_t col) +{ std::vector coord = {col, row} ; + return this->_data[this->convert_to_offset(coord)] ; +} + + +template +const T& Matrix2D::operator () (size_t row, size_t col) const +{ std::vector coord = {col, row} ; + return this->_data[this->convert_to_offset(coord)] ; +} + + +#endif // MATRIX2D_HPP + + diff --git a/src/Matrix_old/Matrix3D.hpp b/src/Matrix_old/Matrix3D.hpp new file mode 100755 index 0000000..96f840b --- /dev/null +++ b/src/Matrix_old/Matrix3D.hpp @@ -0,0 +1,361 @@ +#ifndef MATRIX3D_HPP +#define MATRIX3D_HPP + +#include "Matrix.hpp" + +#include +#include +#include +#include +#include // ifstream +#include // istringstream +#include // std::runtime_error +#include // std::equal() + +#define BUFFER_SIZE 4096 +// const size_t BUFFER_SIZE = 4096 ; + +/*! + * The Matrix3D class is a specialisation of the Matrix + * class to make work with 3D matrices more easily. + * + * A text file format is defined to store such matrices. The specifications are as + * follows : + * Absolutely NO empty lines are allowed! + * The following lines should contain : + * + * 1st line : a slice header, ',,0' indicates that a slice of the 3rd dimension + * is beginning (this is a z slice). + * 2nd - Nth line : the firt slice, as a 2D matrix. In the example below, it has + * dimensions 3x4. + * N+1th line : a slice header, ',,1' indicates that the 2nd slice is beginning. + * N+1th - ... : the second slice + * and so on... + * + * Example of a 3x4x2 3D matrix + * ---- start ---- + * ,,0 + * 1 2 3 4 + * 5 6 7 8 + * 8 9 10 11 + *,,1 + * 12 13 14 15 + * 16 17 18 19 + * 20 21 22 23 + * ----- end ----- + * + */ +template +class Matrix3D : public Matrix +{ + public: + // constructors + Matrix3D() = default ; + + /*! + * \brief Constructs a matrix with the given dimensions, + * filled with 0 values. + * \param dim1 the first dimension. + * \param dim2 the second dimension. + * \param dim3 the third dimension. + */ + Matrix3D(size_t dim1, size_t dim2, size_t dim3) ; + + /*! + * \brief Constructs a matrix with the given dimensions and + * initialize the values to the given value. + * \param dim1 the first dimension. + * \param dim2 the second dimension. + * \param dim3 the third dimension. + * \param value the value to initialize the matrix content + * with. + */ + Matrix3D(size_t dim1, size_t dim2, size_t dim3, T value) ; + + /*! + * \brief Copy constructor + * \param other the matrix to copy the content from. + */ + Matrix3D(const Matrix3D& other) ; + + /*! + * \brief Constructs a matrix from a text file. + * \param file_address the address of the file containing the matrix. + * \throw std::runtime_error if anything happen while reading the + * file (format error, file not found, etc). + */ + Matrix3D(const std::string& file_address) /* throw (std::runtime_error) */ ; + + // methods overloaded from Matrix + using Matrix::get ; + using Matrix::set ; + + // methods + /*! + * \brief Gets the element at the given coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \throw std::out_of_range exception if the coordinates + * are out of range. + * \return the element. + */ + T get(size_t dim1, size_t dim2, size_t dim3) const /* throw (std::out_of_range) */ ; + /*! + * \brief Sets the element at the given coordinates + * to the given value. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param value the new value. + * \throw std::out_of_range exception if the coordinates + * are out of range. + */ + void set(size_t dim1, size_t dim2, size_t dim3, T value) /* throw (std::out_of_range) */ ; + + /*! + * \brief Produces a nice representation of the matrix on the given + * stream. + * \param stream the stream. + * \param precision the rounding precision. + * \param width the column width in number of characters. + * \param sep the character separator. + */ + void print(std::ostream& stream, size_t precision=4 ,size_t width=6, char sep=' ') const ; + + // operators + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \return a reference to this element. + */ + T& operator() (size_t dim1, size_t dim2, size_t dim3) ; + /*! + * \brief Returns a constant reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \return a constant reference to this element. + */ + const T& operator() (size_t dim1, size_t dim2, size_t dim3) const ; + + private: + // methods + /*! + * \brief Checks whether a given string is a slice header + * (such as ",,0"), as found in files storing Matrix3D. + * \param str the string to check. + * \return whether the string is a slice header. + */ + bool is_header(const std::string& str) const ; + +} ; + +/*! + * \brief Sends a representation of the matrix to the stream. + * \param stream the stream of interest. + * \param m the matrix of interest. + * \return a reference to the stream. + */ +template +std::ostream& operator << (std::ostream& stream, const Matrix3D& m) +{ m.print(stream) ; + return stream ; +} + +template +Matrix3D::Matrix3D(size_t dim1, size_t dim2, size_t dim3) + : Matrix3D(dim1, dim2, dim3, 0) +{} + +template +Matrix3D::Matrix3D(size_t dim1, size_t dim2, size_t dim3, T value) + : Matrix({dim1, dim2, dim3}, value) +{} + +template +Matrix3D::Matrix3D(const Matrix3D &other) + : Matrix(other) +{} + + +template +Matrix3D::Matrix3D(const std::string &file_address) /* throw (std::runtime_error) */ +{ + this->_dim = {0,0,0} ; + this->_data = std::vector() ; + this->_dim_size = this->_dim.size() ; + this->_data_size = this->_data.size() ; + this->_dim_prod = std::vector(this->_dim_size, 0) ; + + std::ifstream file(file_address, std::ifstream::in) ; + if(file.fail()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! cannot open %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + std::string buffer_str ; + std::vector buffer_vec ; + T buffer_T ; + + // read file + size_t n_line = 0, n_line_data = 0 ; // number of line and of data line read + size_t row_len = 0, col_len = 0 ; // length of row and column in nber of values + size_t row_len_cur = 0, col_len_cur = 0 ; // current number of values read in row and col + + while(getline(file, buffer_str)) + { // check stream status and read content + if(buffer_str.size() == 0) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s (empty line)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + if(file.fail()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + // check whether it is the beginning of a slice + // 1st line in file should be + if(this->is_header(buffer_str)) + { // check that slice have a constant number of rows + if(this->_dim[2] == 1) + { col_len = col_len_cur ; + this->_dim[0] = row_len ; + this->_dim[1] = col_len ; + } + else if(col_len_cur != col_len) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + this->_dim[2]++ ; + col_len_cur = 0 ; + n_line++ ; + continue ; + } + // 1st line in file should be a header and entering + // this block is forbidden + if(n_line == 0) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! first line is not a slice header in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + // parse line + row_len_cur = 0 ; + buffer_vec.clear() ; + std::istringstream buffer_ss(buffer_str) ; + while(buffer_ss >> buffer_T) + { buffer_vec.push_back(buffer_T) ; + row_len_cur++ ; + } + // check for an error which likely indicates that a value could not be + // casted into a type T (mixed data types in the file) + if(buffer_ss.fail() and not buffer_ss.eof()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! could not read a line in %s (incompatible data types)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + // check that number of column is constant + if(n_line_data == 0) + { row_len = row_len_cur ; } + else if(row_len_cur != row_len) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + // update matrix content + for(auto i : buffer_vec) + { this->_data.push_back(i) ; + this->_data_size++ ; + } + col_len_cur++ ; + n_line_data++ ; + n_line++ ; + } + // check dimensions of last slice + if(col_len_cur != col_len) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + file.close() ; + this->compute_dim_product() ; +} + + +template +T Matrix3D::get(size_t dim1, size_t dim2, size_t dim3) const /* throw(std::out_of_range) */ +{ try + { return this->get({dim1, dim2, dim3}) ; } + catch(std::out_of_range& e) + { throw e ; } +} + +template +void Matrix3D::set(size_t dim1, size_t dim2, size_t dim3, T value) /* throw(std::out_of_range) */ +{ try + { return this->set({dim1, dim2, dim3}, value) ; } + catch(std::out_of_range& e) + { throw e ; } +} + + +template +T& Matrix3D::operator () (size_t dim1, size_t dim2, size_t dim3) +{ std::vector coord = {dim2, dim1, dim3} ; + return this->_data[this->convert_to_offset(coord)] ; +} + + +template +void Matrix3D::print(std::ostream& stream, size_t precision, size_t width, char sep) const +{ + stream.setf(std::ios::left) ; + std::vector dim = this->get_dim() ; + for(size_t z=0; z +const T& Matrix3D::operator () (size_t dim1, size_t dim2, size_t dim3) const +{ std::vector coord = {dim2, dim1, dim3} ; + return this->_data[this->convert_to_offset(coord)] ; +} + + +template +bool Matrix3D::is_header(const std::string& str) const +{ if(str[0] == ',' and + str[1] == ',' and + str.find(',', 2) == std::string::npos) + { return true ; } + return false ; +} + +#endif // MATRIX3D_HPP diff --git a/src/Matrix_old/Matrix4D.hpp b/src/Matrix_old/Matrix4D.hpp new file mode 100755 index 0000000..ee699bd --- /dev/null +++ b/src/Matrix_old/Matrix4D.hpp @@ -0,0 +1,539 @@ +#ifndef MATRIX4D_HPP +#define MATRIX4D_HPP + +#include "Matrix.hpp" + +#include +#include +#include // std::out_of_range +#include +#include +#include // ifstream +#include // sstream + + +#define BUFFER_SIZE 4096 +// const size_t BUFFER_SIZE = 4096 ; + +/*! + * The Matrix4D class is a specialisation of the Matrix + * class to handle 4D matrices more easily. + * + * A text file format is defined to store such matrices. + * The specifications are as follows : + * + * Absolutely NO empty lines are allowed! + * The following lines should contain : + * + * 1st line : a slice header ',,,0' indicating that a + * slice of the 4th dimension is beginning. + * 3nd - Nth line : the slice of the 4th dimension. It contains + * slice in the 3rd dimension which are 2D + * matrices separated by headers (',,0' and + * ',,1' in the below example). + * N+1th line : ',,,1' indicating that the 2nd slice of the + * 4th dimension is beginning. + * and so on... + * + * Example + * ---- start ---- + * ,,,0 + * ,,0 + * 1 2 3 + * 4 5 6 + * ,,1 + * 7 8 9 + * 10 11 12 + * ,,,1 + * ,,0 + * 21 22 23 + * 24 25 26 + * ,,1 + * 27 28 29 + * 30 31 32 + * ----- end ----- + * + */ +template +class Matrix4D : public Matrix +{ + public: + // constructors + Matrix4D() = default ; + /*! + * \brief Constructs a matrix with the given dimensions, + * filled with 0 values. + * \param dim1 the first dimension. + * \param dim2 the second dimension. + * \param dim3 the third dimension. + * \param dim4 the fourth dimension. + */ + Matrix4D(size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4) ; + /*! + * \brief Constructs a matrix with the given dimensions and + * initialize the values to the given value. + * \param dim1 the first dimension. + * \param dim2 the second dimension. + * \param dim3 the third dimension. + * \param dim4 the fourth dimension. + * \param value the value to initialize the matrix content + * with. + */ + Matrix4D(size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4, + T value) ; + /*! + * \brief Copy constructor + * \param other the matrix to copy the content from. + */ + Matrix4D(const Matrix4D& other) ; + /*! + * \brief Constructs a matrix from a text file. + * \param file_address the address of the file containing the matrix. + * \throw std::runtime_error if anything happen while reading the + * file (format error, file not found, etc). + */ + Matrix4D(const std::string& file_address) /* throw (std::runtime_error) */ ; + + // methods overloaded from Matrix + using Matrix::get ; + using Matrix::set ; + + // methods OK + /*! + * \brief Gets the element at the given coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param dim4 the fourth dimension coordinate. + * \throw std::out_of_range exception if the coordinates + * are out of range. + * \return the element. + */ + T get(size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4) const /* throw (std::out_of_range) */ ; + /*! + * \brief Sets the element at the given coordinates + * to the given value. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param dim4 the fourth dimension coordinate. + * \param value the new value. + * \throw std::out_of_range exception if the coordinates + * are out of range. + */ + void set(size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4, + T value) /* throw (std::out_of_range) */ ; + /*! + * \brief Produces a nice representation of the matrix on the given + * stream. + * \param stream the stream. + * \param precision the rounding precision. + * \param width the column width in number of characters. + * \param sep the character separator. + */ + void print(std::ostream& stream, + size_t precision=4, + size_t width=6, + char sep=' ') const ; + + // operators OK + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param dim4 the third dimension coordinate. + * \return a reference to this element. + */ + T& operator() (size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4) ; + /*! + * \brief Returns a reference to the corrresponding + * element. This method does not perform any check on + * the coordinates. + * \param dim1 the first dimension coordinate. + * \param dim2 the second dimension coordinate. + * \param dim3 the third dimension coordinate. + * \param dim4 the third dimension coordinate. + * \return a reference to this element. + */ + const T& operator() (size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4) const ; + + private: + // methods + /*! + * \brief Checks whether a given string is a 3D header + * (such as ",,0"), as found in files storing Matrix4D. + * \param str the string to check. + * \return whether the string is such a slice header. + */ + bool is_header_3d(const std::string& str) const ; + /*! + * \brief Checks whether a given string is a 4D header + * (such as ",,,0"), as found in files storing Matrix4D. + * \param str the string to check. + * \return whether the string is such a slice header. + */ + bool is_header_4d(const std::string& str) const ; + + /*! + * \brief Routine to load 4D matrices from files. + * This method reads from a std::ifstream object, + * from the current pointer location until i) a 4D + * header line is found (such as ',,,1') or ii) until + * it cannot read anymore from the stream. All + * data are pushed back into the data vector and + * the dimensions of the data read are stored into + * the dim vector (these data are actually a 3D + * matrix). If the method returned because it + * found another 4D header, it returns true, false + * otherwise. + * To read an entire 4D matrix from a file, simply + * use this scheme : i) read the 1st 4D header + * ii) call this function while it returns true. + * \param file_name a reference to a string containing + * the address of the file currently read (for exception + * messages). + * \param file a reference to the std::ifstream to read + * from. Obviously, the stream state will be modified as + * the method reads from it. However, it will never be + * closed by the method. + * \param data a reference to an empty vector where the + * read data will be pushed back. + * \param dim a reference to an empty vector where the + * dimensions of the read data will be stored. + * \return whether the last piece of data read from the + * stream was a 4D header. + */ + bool get_3d_slice(const std::string& file_name, + std::ifstream& file, + std::vector& data, + std::vector& dim) const + /* throw (std::runtime_error) */ ; + +} ; + +template +std::ostream& operator << (std::ostream& stream, const Matrix4D& m) +{ m.print(stream) ; + return stream ; +} + +template +Matrix4D::Matrix4D(size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4) + : Matrix({dim1, dim2, dim3, dim4}, 0) +{} + +template +Matrix4D::Matrix4D(size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4, + T value) + : Matrix({dim1, dim2, dim3, dim4}, value) +{} + +template +Matrix4D::Matrix4D(const Matrix4D &other) + : Matrix(other) +{} + +template +Matrix4D::Matrix4D(const std::string &file_address) /* throw (std::runtime_error) */ +{ this->_dim = {0,0,0,0} ; + this->_data = std::vector() ; + this->_dim_size = this->_dim.size() ; + this->_data_size = this->_data.size() ; + this->_dim_prod = std::vector(this->_dim_size, 0) ; + + std::ifstream file(file_address, std::ifstream::in) ; + if(file.fail()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! cannot open %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + + std::string buffer_str ; + std::vector buffer_t ; + std::vector dim ; + + getline(file, buffer_str) ; + bool found_4d_header = this->is_header_4d(buffer_str) ; + do + { // check stream status and read content + if(buffer_str.size() == 0) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s (empty line)", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + if(file.fail()) + { file.close() ; + char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + // this is the beginning of a 3D slice -> get it using routine + if(found_4d_header) + { try + { // get slice + buffer_t.clear() ; + dim.clear() ; + found_4d_header = this->get_3d_slice(file_address, file, buffer_t, dim); + // update data + for(const auto& i : buffer_t) + { this->_data.push_back(i) ; + this->_data_size++ ; + } + // update dim only for the 1st slice (the 1st slice set the dimensions) + if(this->_dim[3] == 0) + { this->_dim[0] = dim[0] ; + this->_dim[1] = dim[1] ; + this->_dim[2] = dim[2] ; + } + // check dimensions of the slice + else + { if(dim[0] != this->_dim[0] or + dim[1] != this->_dim[1] or + dim[2] != this->_dim[2]) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", + file_address.c_str()) ; + throw std::runtime_error(msg) ; + } + } + this->_dim[3]++ ; + } + catch(std::runtime_error& e) + { file.close() ; + throw e ; + } + } + } while(found_4d_header) ; + + file.close() ; + this->compute_dim_product() ; +} + +template +T Matrix4D::get(size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4) const /* throw (std::out_of_range) */ +{ try + { return this->get({dim1, dim2, dim3, dim4}) ; } + catch(std::out_of_range& e) + { throw e ; } +} + +template +void Matrix4D::set(size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4, + T value) /* throw (std::out_of_range) */ +{ try + { this->set({dim1, dim2, dim3, dim4}, value) ; } + catch(std::out_of_range& e) + { throw e ; } +} + +template +void Matrix4D::print(std::ostream &stream, + size_t precision, + size_t width, + char sep) const +{ stream.setf(std::ios::left) ; + std::vector dim = this->get_dim() ; + + for(size_t dim4=0; dim4 +T& Matrix4D::operator () (size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4) +{ std::vector coord = {dim2, dim1, dim3, dim4} ; + return this->_data[this->convert_to_offset(coord)] ; +} + +template +const T& Matrix4D::operator () (size_t dim1, + size_t dim2, + size_t dim3, + size_t dim4) const +{ std::vector coord = {dim2, dim1, dim3, dim4} ; + return this->_data[this->convert_to_offset(coord)] ; +} + +template +bool Matrix4D::is_header_3d(const std::string &str) const +{ if(str[0] == ',' and + str[1] == ',' and + str.find(',', 2) == std::string::npos) + { return true ; } + return false ; +} + +template +bool Matrix4D::is_header_4d(const std::string &str) const +{ if(str[0] == ',' and + str[1] == ',' and + str[2] == ',' and + str.find(',', 3) == std::string::npos) + { return true ; } + return false ; +} + +template +bool Matrix4D::get_3d_slice(const std::string& file_name, + std::ifstream& file, + std::vector &data, + std::vector &dim) const /* throw (std::runtime_error) */ +{ + bool found_4d_header = false ; // the flag to return + + dim = {0,0,0} ; + + std::string buffer_str ; + std::vector buffer_vec ; + T buffer_T ; + + size_t n_line = 0, n_line_data = 0 ; // number of line and of data line read + size_t row_len = 0, col_len = 0 ; // length of row and column in nber of values + size_t row_len_cur = 0, col_len_cur = 0 ; // current number of values read in row and col + + while(getline(file, buffer_str)) + { // std::cerr << "in " << n_line << " : " << buffer_str << std::endl ; + // check stream status and read content + if(buffer_str.size() == 0) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s (empty line)", file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + if(file.fail()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "error! while reading %s", file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + // check whether this is the beginning of a 4D slice header, if so + // break ; + if(this->is_header_4d(buffer_str)) + { found_4d_header = true ; + break ; + } + // check whether it is the beginning of a slice + // 1st line in file should be + if(this->is_header_3d(buffer_str)) + { // check that slice have a constant number of rows + if(dim[2] == 1) + { col_len = col_len_cur ; + dim[0] = row_len ; + dim[1] = col_len ; + } + else if(col_len_cur != col_len) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", + file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + dim[2]++ ; + col_len_cur = 0 ; + n_line++ ; + continue ; + } + // 1st line in file should be a header and entering + // this block is forbidden + if(n_line == 0) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! first line is not a slice header in %s", + file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + + // parse line + row_len_cur = 0 ; + buffer_vec.clear() ; + std::istringstream buffer_ss(buffer_str) ; + while(buffer_ss >> buffer_T) + { buffer_vec.push_back(buffer_T) ; + row_len_cur++ ; + } + // check for an error which likely indicates that a value could not be + // casted into a type T (mixed data types in the file) + if(buffer_ss.fail() and not buffer_ss.eof()) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! could not read a line in %s (incompatible data types)", + file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + + // check that number of column is constant + if(n_line_data == 0) + { row_len = row_len_cur ; } + else if(row_len_cur != row_len) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", + file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + + // update matrix content + for(auto i : buffer_vec) + { data.push_back(i) ; } + col_len_cur++ ; + n_line_data++ ; + n_line++ ; + } + + // check dimensions of last slice + if(col_len_cur != col_len) + { char msg[BUFFER_SIZE] ; + sprintf(msg, "format error! slice have variable dimensions in %s", + file_name.c_str()) ; + throw std::runtime_error(msg) ; + } + + return found_4d_header ; +} + + +#endif // MATRIX4D_HPP diff --git a/src/Parallel/ThreadPool.cpp b/src/Parallel/ThreadPool.cpp new file mode 100755 index 0000000..7d03b24 --- /dev/null +++ b/src/Parallel/ThreadPool.cpp @@ -0,0 +1,144 @@ +#include "ThreadPool.hpp" +#include +#include +#include //std::pair + + +std::vector> ThreadPool::split_range(size_t from, + size_t to, + size_t n_thread) +{ // contains the [from, to) slices + std::vector> coordinate_list(n_thread) ; + + size_t by = to / n_thread ; + size_t from_current = from, to_current = 0 ; + + for(size_t n=0; n to) ? (to_current = to) : 0 ; + coordinate_list[n] = std::pair(from_current, to_current) ; + } + return coordinate_list ; +} + +ThreadPool::ThreadPool(size_t n_threads, bool debug) + : queue_task(), + queue_mutex(), + queue_open(true), + debug(debug) +{ assert(n_threads > 0) ; + this->threads = std::vector(n_threads) ; + for(size_t i=0; ithreads[i] = std::thread(&ThreadPool::thread_routine, this) ; } +} + + +ThreadPool::~ThreadPool() +{} + + +size_t ThreadPool::getNThread() const +{ return this->threads.size() ; } + + +void ThreadPool::addJob(std::function&& task) +{ // only add a job in the queues if they are open + if(this->isQueueOpen()) + { this->lock_mutex_queue() ; + // this->debug_print(std::string("started adding job")) ; + this->queue_task.push(task) ; + // this->debug_print(std::string("ended adding job")) ; + this->unlock_mutex_queue() ; + } + std::this_thread::sleep_for(std::chrono::milliseconds(5)) ; +} + + +void ThreadPool::join() +{ // closes the queues, later call to addJob() will be effect + this->close_queue() ; + + // joins the threads + for(auto& thr : this->threads) + { if(thr.joinable()) + { this->debug_print(std::string("joined a thread")) ; + thr.join() ; + } + } +} + + +void ThreadPool::thread_routine() +{ + this->debug_print(std::string("started")) ; + + while(true) + { + // get a function and the arguments value from the queue + std::function task ; + bool has_task = false ; + bool is_queue_open = this->isQueueOpen() ; + std::pair args ; + this->lock_mutex_queue() ; + bool is_queue_empty = this->queue_task.empty() ; + if(not is_queue_empty) + { // this->debug_print(std::string("fetching from queue")) ; + task = this->queue_task.front() ; + this->queue_task.pop() ; + has_task = true ; + } + this->unlock_mutex_queue() ; + + // runs the task + if(has_task) + { this->debug_print(std::string("working")) ; + task() ; + } + // exit + else if(is_queue_empty and not is_queue_open) + { break ; } + std::this_thread::sleep_for(std::chrono::milliseconds(10)) ; + } + this->debug_print(std::string("ended")) ; +} + + +void ThreadPool::lock_mutex_queue() +{ this->queue_mutex.lock() ; + // this->debug_print(std::string("locked mutex")) ; +} + + +void ThreadPool::unlock_mutex_queue() +{ this->queue_mutex.unlock() ; + // this->debug_print(std::string("unlocked mutex")) ; +} + + +void ThreadPool::open_queue() +{ this->queue_open = true ; } + + +void ThreadPool::close_queue() +{ this->queue_open = false ; } + + +bool ThreadPool::isQueueOpen() +{ return this->queue_open ; } + + +bool ThreadPool::isDebugOn() const +{ return this->debug ; } + + +void ThreadPool::debug_print(const std::string& msg, std::ostream& out) const +{ if(this->isDebugOn()) + { std::hash hasher ; + char message[1024] ; + sprintf(message, "Thread %zu : %s\n", + hasher(std::this_thread::get_id()), msg.c_str()) ; + out << message ; + } +} + diff --git a/src/Parallel/ThreadPool.hpp b/src/Parallel/ThreadPool.hpp new file mode 100755 index 0000000..8392ab5 --- /dev/null +++ b/src/Parallel/ThreadPool.hpp @@ -0,0 +1,170 @@ +#ifndef THREADPOOL_HPP +#define THREADPOOL_HPP + +#include +#include +#include +#include +#include +#include +#include +#include // std::pair +#include // std::function + +typedef void (*function_ptr)(size_t, size_t) ; + + +/*! + * \brief The ThreadPool class implements a simple pool of working threads. + * At construction, threads are spawned and tries to get tasks to execute + * from a queue. Any access to the queue is synchonized through the use of + * a mutex. + * Jobs are added to the queue by calling addJob(task) where task is the + * result of std::bind(). The queue can be open - in which case the jobs are + * effectively added to the queues - or closed - in which case nothing can + * be push into the queue anymore. + * Stopping the pool is done through calling join() which will close the queue + * - and eventually let the threads empty it - and join all the threads. + * Any access to the queue is synchonized through the use of a mutex. + */ +class ThreadPool +{ + public: + /*! + * \brief Split a range [from,to) into n equal non-overlapping intervals + * [from,b), [b,c), .... [d,to) where [d,to) is the n-th slice. + * This function is usefull to compute the boundary indices in between + * which worker threads should work on a common data structure. + * \param from the lower range limit (comprised within the range). + * \param to the upper range limit (non included in the range). + * \param n the number of slices to split the range into. + * \return a vector containing pairs [from,to) for each interval. + */ + static std::vector> split_range(size_t from, + size_t to, + size_t n) ; + + public: + /*! + * \brief Default constructor. + * Constructs a thread pool containing the + * given number of threads, by default 1. + * \param n_threads the number of threads, by default 1. + * \param debug enables debugging verbosity. + */ + ThreadPool(size_t n_threads=1, bool debug=false) ; + + ThreadPool(const ThreadPool& other) = delete ; + + /*! + * \brief class destructor + */ + ~ThreadPool() ; + + /*! + * \brief gets the number of threads in the pool. + * \return the number of threads. + */ + size_t getNThread() const ; + + /*! + * \brief adds a task in the queue for the threads. + * Once join() has been called, the job queues are closed + * and calling this method remains effectless. + * \param task a function bound to its arguments using std::bind() + * WARNING : I'dont know whether this is portable :-/ + */ + void addJob(std::function&& task) ; + + /*! + * \brief closes the job queues and join the threads. + * When this methods returns, all the jobs have been + * run and each threads has been joined. + */ + void join() ; + + private: + /*! + * \brief runs an inifinite routine during which + * the caller tries to get a task to execute from + * task queue. + * The routine is interrupted when the queue is + * closed AND empty. + */ + void thread_routine() ; + + /*! + * \brief locks the queue accession mutex. + */ + void lock_mutex_queue() ; + + /*! + * \brief unlocks the queue accession mutex. + */ + void unlock_mutex_queue() ; + + /*! + * \brief opens the jobs queue. Later calls to + * addJob() have an effect. + */ + void open_queue() ; + + /*! + * \brief closes the jobs queue. Later calls to + * addJob() remain effectless. + */ + void close_queue() ; + + /*! + * \brief checks whether the queue is open. + * \return whether the job queue is open. + */ + bool isQueueOpen() ; + + /*! + * \brief checks whether the debugging verbosity is on. + * \return whether the debugging verbosity is on. + */ + bool isDebugOn() const ; + + /*! + * \brief formats and prints the given debug message + * (with the thread number) to the given stream. + * The message is effectively printed only if this->debug + * is set to true (set at construction time). + * \param msg the message to print. + */ + void debug_print(const std::string& msg, std::ostream& out=std::cerr) const ; + + // fields + /*! + * \brief the threads. + */ + std::vector threads ; + + /*! + * \brief the task queue. + */ + std::queue> queue_task ; + + /*! + * \brief the synchronization mutex to access + * the queues. + */ + std::mutex queue_mutex ; + + /*! + * \brief whether the queues are open for pushing + * or not. + */ + bool queue_open ; + + /*! + * \brief whether debugging verbosity is enabled. + */ + bool debug ; + +} ; + + +#endif // THREADPOOL_HPP diff --git a/src/Random/Random.cpp b/src/Random/Random.cpp new file mode 100755 index 0000000..201bb70 --- /dev/null +++ b/src/Random/Random.cpp @@ -0,0 +1,30 @@ +#include "Random.hpp" + +bool rand_bernoulli(double p) +{ std::bernoulli_distribution dist(p) ; + return dist(getRandomGenerator()) ; +} + + +std::vector rand_bernoulli(double p, size_t n) +{ std::vector vector(n) ; + std::bernoulli_distribution dist(p) ; + for(size_t i=0; i dist(m, sd) ; + return dist(getRandomGenerator()) ; +} + + +std::vector rand_normal(double m, double sd, double n) +{ std::vector vector(n) ; + std::normal_distribution dist(m, sd) ; + for(size_t i=0; i +#include +#include + +#include "RandomNumberGenerator.hpp" + + +/*! + * \brief Generates a random number from a + * Bernouilli distribution of parameter p. + * \param p the probability of success. + * \return a random number. + */ +bool rand_bernoulli(double p) ; + + +/*! + * \brief Generates n random number from a + * Bernouilli distribution of parameter p. + * Not faster than rand_bernoulli(double p) + * \param p the probability of success. + * \param n the number of values to sample. + * \return a vector of n random numbers. + */ +std::vector rand_bernoulli(double p, size_t n) ; + + +/*! + * \brief Generates a random number from a + * Normal distribution of mean m and standard + * deviation sd. + * \param m the mean. + * \param sd the standard deviation. + * \return a random number. + */ +double rand_normal(double m, double sd) ; + + +/*! + * \brief Generates n random numbers from a + * Normal distribution of mean m and standard + * deviation sd. + * More efficient for sampling than + * rand_normal(double m, double sd). + * \param m the mean. + * \param sd the standard deviation. + * \param n the number of values to sample. + * \return a vector of n random numbers. + */ +std::vector rand_normal(double m, double sd, size_t n) ; + + +/*! Generates a real random number from a uniform + * distribution comprised between min and max. + * \param min the lower limit of the distribution. + * \param max the upper limit of the distribution. + * \return a random number. + */ +template +T rand_real_uniform(T min, T max) +{ std::uniform_real_distribution dist(min, max) ; + return dist(getRandomGenerator()) ; +} + + +/*! Generates n real random numbers from a uniform + * distribution comprised between min and max. + * \param min the lower limit of the distribution. + * \param max the upper limit of the distribution. + * \param n the number of value to sample. + * \return a vector of n random number. + */ +template +std::vector rand_real_uniform(T min, T max, size_t n) +{ + assert(n > 0) ; + + std::vector vector(n) ; + std::uniform_real_distribution dist(min, max) ; + + for(size_t i=0; i +T rand_int_uniform(T min, T max) +{ std::uniform_int_distribution dist(min, max) ; + return dist(getRandomGenerator()) ; +} + + +/*! Generates n random integers from a uniform + * distribution comprised between min and max. + * \param min the lower limit of the distribution. + * \param max the upper limit of the distribution. + * \param n the number of value to sample. + * \return a vector of n random number. + */ +template +std::vector rand_int_uniform(T min, T max, size_t n) +{ + assert(n > 0) ; + + std::vector vector(n) ; + std::uniform_int_distribution dist(min, max) ; + + for(size_t i=0; i +#include + + +/*! + * \brief Initialise and randomly seeds a 32-bit Mesrenne Twister random + * number generator and returns it. + * Initialisation and seeding are static thus from the first time this + * function is called, it will ALWAYS return the same generator. + * \param seed a value to seed the random generator with. If seed is 0, + * then the generator is seeded using a random generator. + * \return ALWAYS THE SAME random number generator. + */ +std::mt19937& getRandomGenerator(std::string seed="") ; + + +#endif diff --git a/src/Statistics/Statistics.cpp b/src/Statistics/Statistics.cpp new file mode 100644 index 0000000..3530943 --- /dev/null +++ b/src/Statistics/Statistics.cpp @@ -0,0 +1,26 @@ +#include +#include // M_PI, pow(), sqrt(), log(), lgamma() +#include // beta_distribution +#include + + +double normal_pmf(double x, double mean, double sd) +{ static double pi_2 = 2.*M_PI ; + return ( 1. / ( sd * sqrt(pi_2) )) * exp(-0.5 * pow((x-mean)/sd, 2.0 ) ); +} + +double poisson_pmf(int x, double lambda) +{ if(x+ lambda == 0) + { return 1. ; } + return exp(x * log(lambda) - lgamma(x + 1.0) - lambda); +} + +double beta_pmf(double x, double alpha, double beta) +{ + boost::math::beta_distribution<> beta_dist(alpha, beta) ; + double y = quantile(beta_dist, x) ; + + return y ; +} + + diff --git a/src/Statistics/Statistics.hpp b/src/Statistics/Statistics.hpp new file mode 100755 index 0000000..fdc9808 --- /dev/null +++ b/src/Statistics/Statistics.hpp @@ -0,0 +1,291 @@ +#ifndef STATISTCS_HPP +#define STATISTICS_HPP + +#include +#include +#include // pow(), sqrt() +#include +#include + +/*! + * \brief Computes the density of x given a gaussian + * distribution with the given mean and standard + * deviation parameters. + * \param x the value of interest. + * \param mean the gaussian mean parameter value. + * \param sd the gaussian standard deviation value. + * \return the probability of x p(x|N(mean,sd)) + */ +double normal_pmf(double x, double mean, double sd) ; + +/*! + * \brief Computes the probability of x given a Poisson + * distribution with the given lambda parameter. + * \param x the value of interest. + * \param lambda the lambda parameter. + * \return the probability of the value given the + * distribution. + */ +double poisson_pmf(int x, double lambda) ; + + +/*! + * \brief Computes the probability of x given a Beta + * distribution with the given alpha and beta parameters. + * \param x the value of interest + * \param alpha the alpha parameter. + * \param beta the beta parameter. + * \return the probability of the value given the + * distribution. + */ +double beta_pmf(double x, double alpha, double beta) ; + + +/*! + * \brief Computes the weighted mean of a vector of measures given their + * probability

. The sum of

is expected to be 1, if not it will + * be normalized. + * \param x a vector of measures. + * \param p a vector of probability associated to each element of . + * \param from the starting index, if different from -1. + * \param to the ending index (not included), if different from -1. + * \return the mean. + */ +template +inline double mean(const std::vector& x, const std::vector& p) ; + +/*! + * \brief Computes the standard deviation of a vector of measures given + * their probability

. + * \param x a vector of measures. + * \param p a vector of probability associated to each element of . + * \param unbiased whether the unbiased standard deviation should be returned + * (the square root of the variance is divided by a normalizing factor). + * \return the standard deviation. + */ +template +inline double sd(const std::vector& x, const std::vector& p, bool unbiased=true) ; + + +/*! \brief Computes the unbiased standard deviation of a vector of + * measures given their probability

. + * \param x a vector of measures. + * \param p a vector of probability associated to each element of . + * \return the unbiased standard deviation. + */ +template +inline double sd_unbiased(const std::vector &x, const std::vector &p) ; + +/*! \brief Computes the biased (non-normalized) standard deviation of a vector of + * measures given their probability

. + * \param x a vector of measures. + * \param p a vector of probability associated to each element of . + * \return the biased standard deviation. + */ +template +inline double sd_biased(const std::vector &x, const std::vector &p) ; + + +/*! + * \brief Computes the pearson correlation coefficient for two given vectors + * using v1[from1-->to1) and v2[from2-->to2). If from1, to1, from2, to2 are equal + * to -1, they are not accounted and the first and last elements considered are + * the beginning and the end of the vectors. + * \param v1 the first vector. + * \param v2 the second vector. + * \param from1 the index of the first value to use in the first vector. + * \param to1 the index after the last value to use in the fist vector. + * \param from2 the index of the first value to use in the second vector. + * \param to2 the index after the last value to use in the second vector. + * \return the correlation coefficient + */ +template +inline double cor_pearson(const std::vector& v1, + const std::vector& v2, + int from1=-1, int to1=-1, + int from2=-1, int to2=-1) ; + +/*! + * \brief Computes the pearson correlation coefficient for two given vectors + * using v1[from1-->to1) and v2(to2<--from2] (the second vector is read backward, + * from the end). + * If from1, to1, from2, to2 are equal to -1, they are not accounted and the first + * and last elements considered are the beginning and the end of the vectors (for + * the last element considered in v2 to be the 1st of the vector (the 0th), to2 + * should be set to -1, which is ignored. However this enables the default behaviour + * using the 0th element as the last one). + * \param v1 the first vector. + * \param v2 the second vector. + * \param from1 the index of the first value to use in the first vector. + * \param to1 the index after the last value to use in the fist vector. + * \param from2 the index of the first value to use in the second vector. + * \param to2 the index after the last value to use in the second vector. + * \return the correlation coefficient + */ +template +inline double cor_pearson_rev(const std::vector& v1, + const std::vector& v2, + int from1=-1, int to1=-1, + int from2=-1, int to2=-1) ; + +template +inline double mean(const std::vector& x, const std::vector& p) +{ + assert(x.size() == p.size()) ; + + double mean = 0. ; + double total = 0. ; + for(auto i : p) + { total += i ; } + for(size_t i=0; i +inline double sd(const std::vector& x, const std::vector& p, bool unbiased) +{ if(unbiased) + { return sd_unbiased(x, p) ; } + else + { return sd_biased(x,p) ; } +} + +template +inline double sd_unbiased(const std::vector& x, const std::vector& p) +{ + assert(x.size() == p.size()) ; + + double v1 = 0. ; + double v2 = 0. ; + double m = mean(x,p) ; + double sd = 0. ; + double total = 0. ; + for(auto i : p) + { total += i ; } + for(size_t i=0; i +inline double sd_biased(const std::vector& x, const std::vector& p) +{ + assert(x.size() == p.size()) ; + + double m = mean(x,p) ; + double sd = 0. ; + double total = 0. ; + for(auto i : p) + { total += i ; } + for(size_t i=0; i +inline double cor_pearson(const std::vector& v1, + const std::vector& v2, + int from1, int to1, + int from2, int to2) +{ + if(from1 == -1) + { from1 = 0 ; } + if(to1 == -1) + { to1 = v1.size() ; } + if(from2 == -1) + { from2 = 0 ; } + if(to2 == -1) + { to2 = v2.size() ; } + + // 0 <= from < to < v.size() + // if from == to -> no sense, no value : return nan + // if from > to -> no sense, backward : does a segfault + assert(from1 >= 0) ; + assert(from1 < to1) ; + assert(to1 <= v1.size()) ; + assert(from2 >= 0) ; + assert(from2 < to2) ; + assert(to2 <= v2.size()) ; + + // the terms of the formula + double sum_v1v2 = 0. ; + double sum_v1 = 0. ; + double sum_v2 = 0. ; + double sum_v1_2 = 0. ; + double sum_v2_2 = 0. ; + // the effective number of values considered + double n = to1 - from1 ; + + for(size_t i1=from1, i2=from2; i1 +inline double cor_pearson_rev(const std::vector& v1, + const std::vector& v2, + int from1, int to1, + int from2, int to2) +{ + if(from1 == -1) + { from1 = 0 ; } + if(to1 == -1) + { to1 = v1.size() ; } + if(from2 == -1) + { from2 = v2.size() - 1 ; } + if(to2 == -1) + { to2 = 0 ; } + + // beware -> for v1 [from1, to1) + // for v2 [to2, from2] + // use from1 and to1 for the loop conditions + + // 0 <= from < to < v1.size() + // 0 <= to2 < from2 < v2.size() because for v2 [to2, from2] + // if from == to -> no sense, no value : return nan + // if from > to -> no sense, backward : does a segfault + assert(from1 >= 0) ; + assert(from1 < to1) ; + assert(to1 <= v1.size()) ; + assert(to2 >= 0) ; + assert(to2 < from2) ; + assert(from2 < v2.size()) ; + + // the terms of the formula + double sum_v1v2 = 0. ; + double sum_v1 = 0. ; + double sum_v2 = 0. ; + double sum_v1_2 = 0. ; + double sum_v2_2 = 0. ; + // the effective number of values considered + double n = to1 - from1 ; + + for(int i1=from1, i2=from2; i1 +#include // accumulate() + + +#include +#include +#include +#include + +/*! + * \brief Given a matrix and an offset, this methods converts + * the offset into a coordinates vector (row, col, ...). It is + * a simple copy/paste of Matrix::convert_to_coord() which is + * private. + * \param m a matrix. + * \param offset an offset + * \return a vector of coordinates (row,col,...) corresponding to + * the offset for the given matrix. + */ +std::vector convert_to_coord(const Matrix& m, size_t offset) +{ + std::vector dim = m.get_dim() ; // (row, col, ...) format + if(dim.size() > 1) + { std::swap(dim[0], dim[1]) ; } // (x,y,...) format + + std::vector coord(dim.size(), 0) ; + std::vector dim_prod(dim.size(), 0) ; + dim_prod[0] = 1 ; + if(dim.size() > 1) + { dim_prod[1] = dim[0] ; } + if(dim.size() > 2) + { for(size_t i=2; i=0; i--) + { size_t c = offset / dim_prod[i] ; + coord[i] = c ; + offset -= (dim_prod[i]*c) ; + } + + if(dim.size() > 1) + { std::swap(coord[0], coord[1]) ; } // (row,col,...) format + return coord ; +} + +// Matrix test suite +SUITE(Matrix) +{ // displays message + TEST(message) + { std::cout << "Starting Matrix tests..." << std::endl ; } + + // tests normal constructor + TEST(constructor) + { + std::vector dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + CHECK_EQUAL(dim_1.size(), m1.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim_1, m1.get_dim(), dim_1.size()) ; + CHECK_EQUAL(data_size_1, m1.get_data_size()) ; + + // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10 + Matrix m2(dim_2) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + CHECK_EQUAL(dim_2.size(), m2.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim_2, m2.get_dim(), dim_2.size()) ; + CHECK_EQUAL(data_size_2, m2.get_data_size()) ; + CHECK_EQUAL(data_size_2, m2.get_data().size()) ; + + // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0 + Matrix m3(dim_3) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + CHECK_EQUAL(dim_3.size(), m3.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim_3, m3.get_dim(), dim_3.size()) ; + CHECK_EQUAL(data_size_3, m3.get_data_size()) ; + CHECK_EQUAL(data_size_3, m3.get_data().size()) ; + } + + } + + // tests contructor with value + TEST(constructor_value) + { + std::vector dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + CHECK_EQUAL(dim_1.size(), m1.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim_1, m1.get_dim(), dim_1.size()) ; + CHECK_EQUAL(data_size_1, m1.get_data_size()) ; + for(const auto x : m1.get_data()) + { CHECK_EQUAL(i, x) ; } + + // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10 + Matrix m2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + CHECK_EQUAL(dim_2.size(), m2.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim_2, m2.get_dim(), dim_2.size()) ; + CHECK_EQUAL(data_size_2, m2.get_data_size()) ; + CHECK_EQUAL(data_size_2, m2.get_data().size()) ; + for(const auto x : m2.get_data()) + { CHECK_EQUAL(i, x) ; } + + // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0 + Matrix m3(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + CHECK_EQUAL(dim_3.size(), m3.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim_3, m3.get_dim(), dim_3.size()) ; + CHECK_EQUAL(data_size_3, m3.get_data_size()) ; + CHECK_EQUAL(data_size_3, m3.get_data().size()) ; + for(const auto x : m3.get_data()) + { CHECK_EQUAL(i, x) ; } + } + } + + // tests the get() method, compare a value get with offset with the value get with coordinates + // (computed from offset) + TEST(get) + { + std::vector dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + Matrix m1_2(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m1_2(m1) ; + CHECK_EQUAL(true, m1 == m1_2) ; + + // always has a zero dimension : 0 / 0x1 / 0x1x2/ ... / 0x1x...x10 + Matrix m2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2_2(m2) ; + CHECK_EQUAL(true, m2 == m2_2) ; + + + // is a 0 dimension matrix : 0 / 0x0 / 0x0x...x0 + Matrix m3(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3_2(m3) ; + CHECK_EQUAL(true, m3 == m3_2) ; + } + } + + // tests the () operator + TEST(parenthesis_operator) + { + std::vector dim_1, dim_2, dim_3 ; + size_t data_size_1, data_size_2, data_size_3 ; + + // from 0D to 10D + for(size_t i=1; i<11; i++) + { + dim_1.push_back(i+1) ; + dim_2.push_back(i) ; + dim_3.push_back(0) ; + + // has non-0 dimensions : 1 /1x2 / 1x2x3 / ... / 1x2x...x11 + Matrix m1(dim_1, i) ; + data_size_1 = std::accumulate(dim_1.begin(), dim_1.end(), 1, std::multiplies()) ; + for(size_t j=0; j m2(dim_2, i) ; + Matrix m2_2(dim_2, i) ; + data_size_2 = std::accumulate(dim_2.begin(), dim_2.end(), 1, std::multiplies()) ; + for(size_t j=0; j m3(dim_3, i) ; + Matrix m3_2(dim_3, i) ; + data_size_3 = std::accumulate(dim_3.begin(), dim_3.end(), 1, std::multiplies()) ; + for(size_t j=0; j dim = {i,j} ; + Matrix2D m(i,j) ; + CHECK_EQUAL(dim.size(), m.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ; + CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies()), + m.get_data_size()) ; + } + } + } + + // tests contructor with value + TEST(constructor_value) + { int n = 999 ; + for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { std::vector dim = {i,j} ; + Matrix2D m(i,j,n) ; + CHECK_EQUAL(dim.size(), m.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ; + CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies()), + m.get_data_size()) ; + for(const auto& i : m.get_data()) + { CHECK_EQUAL(n, i) ; } + } + } + } + + // tests the copy constructor + TEST(constructor_copy) + { + for(size_t i=1; i<11; i++) + { std::vector dim ; + + // has non-0 dimensions : 1x2 / 2x3 / ... + dim = {i, i+1} ; + Matrix2D m1(i,i+1) ; + for(size_t j=0; j m1_2(m1) ; + CHECK_EQUAL(true, m1 == m1_2) ; + + // always has a zero dimension : // has a zero dimension : 0x1 / 0x2 / ... + dim = {0, i} ; + Matrix2D m2(0,i) ; + for(size_t j=0; j m2_2(m2) ; + CHECK_EQUAL(true, m2 == m2_2) ; + + // is a 0 dimension matrix : 0x0 + dim = {0, 0} ; + Matrix2D m3(0,0) ; + for(size_t j=0; j m3_2(m3) ; + CHECK_EQUAL(true, m3 == m3_2) ; + } + } + + // tests the get() method, compare a value get with offset with the value get with coordinates + // (computed from offset) + TEST(get) + { + for(size_t i=1; i<11; i++) + { std::vector dim ; + + // has non-0 dimensions : 1x2 / 2x3 / ... + Matrix2D m1(i,i+1, i) ; + dim = {i,i+1} ; + for(size_t j=0; j coord = convert_to_coord(m1, j) ; + CHECK_EQUAL(m1.get(j), m1.get(coord[0], coord[1])) ; } + + // has a zero dimension : 0x1 / 0x2 / ... + Matrix2D m2(0,i,i) ; + dim = {0,i} ; + for(size_t j=0; j coord = convert_to_coord(m2, j) ; + CHECK_EQUAL(m2.get(j), m2.get(coord[0], coord[1])) ; } + + // has zero dimensions : 0x0 + Matrix2D m3(0,0,i) ; + dim = {0,0} ; + for(size_t j=0; j coord = convert_to_coord(m3, j) ; + CHECK_EQUAL(m3.get(j), m3.get(coord[0], coord[1])) ; + } + } + } + + + // test the set() method, set a value and then check it using get() + TEST(set) + { + for(size_t i=1; i<11; i++) + { std::vector dim ; + + // has non-0 dimensions : 1x2 / 2x3 / ... + Matrix2D m1(i,i+1, i) ; + dim = {i,i+1} ; + for(size_t j=0; j coord = convert_to_coord(m1, j) ; + m1.set(coord[0], coord[1], j) ; + } + for(size_t j=0; j m2(0,i,i) ; + dim = {0,i} ; + for(size_t j=0; j coord = convert_to_coord(m2, j) ; + m2.set(coord[0], coord[1], j) ; + } + for(size_t j=0; j m3(0,0,i) ; + dim = {0,0} ; + for(size_t j=0; j coord = convert_to_coord(m3, j) ; + m3.set(coord[0], coord[1], j) ; + } + for(size_t j=0; j m1(i,i+1) ; + CHECK_EQUAL(i, m1.get_nrow()) ; + + // always has a zero dimension : // has a zero dimension : 0x1 / 0x2 / ... + Matrix2D m2(0,i) ; + CHECK_EQUAL(0, m2.get_nrow()) ; + + // is a 0 dimension matrix : 0x0 + Matrix2D m3(0,0) ; + CHECK_EQUAL(0, m3.get_nrow()) ; + } + } + + // tests get_ncol() + TEST(get_ncol) + { for(size_t i=1; i<11; i++) + { + // has non-0 dimensions : 1x2 / 2x3 / ... + Matrix2D m1(i,i+1) ; + CHECK_EQUAL(i+1, m1.get_ncol()) ; + + // always has a zero dimension : // has a zero dimension : 0x1 / 0x2 / ... + Matrix2D m2(0,i) ; + CHECK_EQUAL(i, m2.get_ncol()) ; + + // is a 0 dimension matrix : 0x0 + Matrix2D m3(0,0) ; + CHECK_EQUAL(0, m3.get_ncol()) ; + } + } + + // tests get_row() + TEST(get_row) + { for(size_t i=0; i<11; i++) + { + Matrix2D m(5,i) ; + for(size_t j=0; j row(m.get_ncol()) ; + for(size_t n=0, k=j*m.get_ncol(); n m(i,5) ; + for(size_t j=0; j col(m.get_nrow()) ; + for(size_t n=0, k=j; n m(5,i) ; + for(size_t j=0; j new_row(i, 999) ; + m.set_row(j, new_row) ; + CHECK_EQUAL(i, m.get_row(j).size()) ; + CHECK_ARRAY_EQUAL(new_row, m.get_row(j), new_row.size()) ; + } + + CHECK_THROW(m.set_row(9999, std::vector(i,0)), std::out_of_range) ; + CHECK_THROW(m.set_row(0, std::vector(i+1,0)), std::invalid_argument) ; + } + } + + // tests set_col() + TEST(set_col) + { for(size_t i=0; i<11; i++) + { + Matrix2D m(i,5) ; + for(size_t j=0; j new_col(i, 999) ; + m.set_col(j, new_col) ; + CHECK_EQUAL(i, m.get_col(j).size()) ; + CHECK_ARRAY_EQUAL(new_col, m.get_col(j), new_col.size()) ; + } + CHECK_THROW(m.set_col(9999, std::vector(i,0)), std::out_of_range) ; + CHECK_THROW(m.set_col(0, std::vector(i+1,0)), std::invalid_argument) ; + } + } + + TEST(parenthesis_operator) + { for(size_t i=1; i<11; i++) + { std::vector dim ; + + // has non-0 dimensions : 1x2 / 2x3 / ... + Matrix2D m1(i,i+1, i) ; + dim = {i,i+1} ; + for(size_t j=0; j coord = convert_to_coord(m1, j) ; + m1(coord[0], coord[1]) = j ; + } + for(size_t j=0; j m2(0,i,i) ; + dim = {0,i} ; + for(size_t j=0; j coord = convert_to_coord(m2, j) ; + m2(coord[0], coord[1]) = j ; + } + for(size_t j=0; j m3(0,0,i) ; + dim = {0,0} ; + for(size_t j=0; j coord = convert_to_coord(m3, j) ; + m3(coord[0], coord[1]) = j ; + } + for(size_t j=0; j> v_int({{0,1,2,3},{4,5,6,7}}) ; + std::vector> v_char({{'A','A','A'},{'C','C','C'}, + {'G','G','G'},{'T','T','T'}}) ; + std::vector> v_double({{0.,1.,2.,3.},{4.,5.,6.,7.}}) ; + + Matrix2D m_int(2,4) ; m_int.set_row(0, {0,1,2,3}) ; m_int.set_row(1, {4,5,6,7}) ; + Matrix2D m_char(4,3) ; m_char.set_row(0, {'A','A','A'}) ; m_char.set_row(1, {'C','C','C'}) ; + m_char.set_row(2, {'G','G','G'}) ; m_char.set_row(3, {'T','T','T'}) ; + Matrix2D m_dbl(2,4) ; m_dbl.set_row(0, {0.,1.,2.,3.}) ; m_dbl.set_row(1, {4.,5.,6.,7.}) ; + + // matrix of int + Matrix2D m_int1(file_int1) ; // this one is perfect + Matrix2D m_int2(file_int2) ; // this one has inhomogeneous spaceers but is OK + + CHECK_EQUAL(m_int, m_int1) ; + CHECK_EQUAL(m_int, m_int2) ; + + // matrix with only 1 int + Matrix2D m_int3(file_int7) ; + CHECK_EQUAL( Matrix2D(1,1,1), m_int3) ; + + // empty matrix (empty file) + Matrix2D m_int4(file_int8) ; + CHECK_EQUAL(Matrix2D(0,0), m_int4) ; + + // empty matrix (only eol in file) + Matrix2D m_int5(file_int9) ; + CHECK_EQUAL(Matrix2D(0,0), m_int5) ; + + // these files are not well formatted + CHECK_THROW(m_int2 = Matrix2D(file_int3), std::runtime_error) ; // data are inhomogeneous + CHECK_THROW(m_int2 = Matrix2D(file_int4), std::runtime_error) ; // empty line + CHECK_THROW(m_int2 = Matrix2D(file_int5), std::runtime_error) ; // empty line + CHECK_THROW(m_int2 = Matrix2D(file_int6), std::runtime_error) ; // empty line + + // matrix of char + Matrix2D m_char1(file_char1) ; + CHECK_EQUAL(m_char, m_char1) ; + + // matrix of double + Matrix2D m_dbl1(file_double1) ; + CHECK_EQUAL(m_dbl, m_dbl1) ; + + // file does not exist + CHECK_THROW(Matrix2D m_int2(file_ghost), std::runtime_error) ; + } + + // tests file format, writting a matrix and reading it should return the + // same matrix, uses set() and the == operator + // loading an empty file is not allowed (has no meaning, the file is empty) + TEST(file_format) + { for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { Matrix2D m(i,j) ; + for(size_t a=0; a m2("./src/Unittests/data/matrix2d_out.mat") ; + // any matrix with at least one zero dimension is a null + // matrix + if(i==0 or j==0) + { CHECK_EQUAL(Matrix2D(0,0), m2) ; } + else + { CHECK_EQUAL(m, m2) ; } + } + } + } +} + + + + +SUITE(Matrix3D) +{ // displays message + TEST(message) + { std::cout << "Starting Matrix3D tests..." << std::endl ; } + + + // tests constructor + TEST(constructor) + { for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { std::vector dim = {i,j,k} ; + Matrix3D m(i,j,k) ; + CHECK_EQUAL(dim.size(), m.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ; + CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies()), + m.get_data_size()) ; + } + } + } + } + + // test constructor value + TEST(constructor_value) + { int n = 999 ; + for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { std::vector dim = {i,j,k} ; + Matrix3D m(i,j,k,n) ; + CHECK_EQUAL(dim.size(), m.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ; + CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies()), + m.get_data_size()) ; + for(const auto& i : m.get_data()) + { CHECK_EQUAL(n, i) ; } + } + } + } + } + + // tests copy constructor + TEST(constructor_copy) + { int n = 999 ; + for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { std::vector dim = {i,j,k} ; + Matrix3D m(i,j,k,n) ; + Matrix3D m2(m) ; + CHECK_EQUAL(m, m2) ; + } + } + } + } + + // tests contructor from file, uses the == operator + TEST(constructor_file) + { std::string file_int1("./src/Unittests/data/matrix3d_int1.mat") ; + std::string file_int2("./src/Unittests/data/matrix3d_int2.mat") ; + std::string file_int3("./src/Unittests/data/matrix3d_int3.mat") ; + std::string file_int4("./src/Unittests/data/matrix3d_int4.mat") ; + std::string file_int5("./src/Unittests/data/matrix3d_int5.mat") ; + std::string file_int6("./src/Unittests/data/matrix3d_int6.mat") ; + std::string file_int7("./src/Unittests/data/matrix3d_int7.mat") ; + std::string file_int8("./src/Unittests/data/matrix3d_int8.mat") ; + std::string file_int9("./src/Unittests/data/matrix3d_int9.mat") ; + std::string file_int10("./src/Unittests/data/matrix3d_int10.mat") ; + std::string file_int11("./src/Unittests/data/matrix3d_int11.mat") ; + std::string file_int12("./src/Unittests/data/matrix3d_int12.mat") ; + std::string file_int13("./src/Unittests/data/matrix3d_int13.mat") ; + std::string file_int14("./src/Unittests/data/matrix3d_int14.mat") ; + std::string file_double("./src/Unittests/data/matrix3d_double.mat") ; + std::string file_ghost("./src/Unittests/data/foo.mat") ; + + + std::vector v_int = {-1,0,2,0, + 0,3,0,4, + 0,0,0,0, + 0,0,0,0, + 0,5,-6,0, + 0,7,0,0} ; + + std::vector v_int2 = {1} ; + + std::vector v_dbl = {-1.,0., 2.,0., + 0.,3., 0.,4., + 0.,0., 0.,0., + 0.,0., 0.,0., + 0.,5.,-6.,0., + 0.,7., 0.,0.} ; + + std::vector dim = {2,4,3} ; + std::vector dim2 = {1,1,1} ; + + // matrix of int + Matrix3D m_int(file_int1) ; + CHECK_EQUAL(dim.size(), m_int.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m_int.get_dim(), dim.size()) ; + CHECK_EQUAL(v_int.size(), m_int.get_data_size()) ; + CHECK_ARRAY_EQUAL(v_int, m_int.get_data(), v_int.size()) ; + + // matrix with only 1 int + Matrix3D m_int2(file_int12) ; + CHECK_EQUAL(Matrix3D(1,1,1,1), m_int2) ; + + // empty matrix (empty file) + Matrix3D m_int3(file_int13) ; + CHECK_EQUAL(Matrix3D(0,0,0), m_int3) ; + + // empty matrix (only eol in file) + Matrix3D m_int4(file_int13) ; + CHECK_EQUAL(Matrix3D(0,0,0), m_int4) ; + + // these files are not well formatted + CHECK_THROW(Matrix3D m_int3(file_int2), std::runtime_error) ; // mixed data types + CHECK_THROW(Matrix3D m_int3(file_int3), std::runtime_error) ; // slice of variable dim + CHECK_THROW(Matrix3D m_int3(file_int4), std::runtime_error) ; // slice of variable dim + CHECK_THROW(Matrix3D m_int3(file_int5), std::runtime_error) ; // slice of variable dim + CHECK_THROW(Matrix3D m_int3(file_int6), std::runtime_error) ; // empty line + CHECK_THROW(Matrix3D m_int3(file_int7), std::runtime_error) ; // empty line + CHECK_THROW(Matrix3D m_int3(file_int8), std::runtime_error) ; // empty line + CHECK_THROW(Matrix3D m_int3(file_int9), std::runtime_error) ; // empty line + CHECK_THROW(Matrix3D m_int3(file_int10), std::runtime_error) ; // empty line + CHECK_THROW(Matrix3D m_int3(file_int11), std::runtime_error) ; // empty line + + // this file does not exist + CHECK_THROW(Matrix3D m_int3(file_ghost), std::runtime_error) ; + + // matrix of double + Matrix3D m_double(file_double) ; + CHECK_EQUAL(dim.size(), m_double.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m_double.get_dim(), dim.size()) ; + CHECK_EQUAL(v_int.size(), m_double.get_data_size()) ; + CHECK_ARRAY_EQUAL(v_int, m_double.get_data(), v_int.size()) ; + } + + // tests get() + TEST(get) + { int n = 999 ; + for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { std::vector dim = {i,j,k} ; + Matrix3D m(i,j,k,n) ; + for(size_t l=0; l coord = convert_to_coord(m, l) ; + CHECK_EQUAL(m.get(l), m.get(coord[0], coord[1], coord[2])) ; + } + } + } + } + } + + // tests set() + TEST(set) + { int n = 999 ; + for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { std::vector dim = {i,j,k} ; + Matrix3D m(i,j,k,n) ; + for(size_t l=0; l coord = convert_to_coord(m, l) ; + m.set(coord[0], coord[1], coord[2], l) ; + } + for(size_t l=0; l dim = {i,j,k} ; + Matrix3D m(i,j,k,n) ; + for(size_t l=0; l coord = convert_to_coord(m, l) ; + m(coord[0], coord[1], coord[2]) = l ; + } + for(size_t l=0; l m(i,j,k) ; + for(size_t a=0; a m2("./src/Unittests/data/matrix3d_out.mat") ; + // any matrix with at least one zero dimension is a null + // matrix + if(i==0 or j==0 or k==0) + { CHECK_EQUAL(Matrix3D(0,0,0), m2) ; } + else + { CHECK_EQUAL(m, m2) ; } + } + } + } + } +} + + +SUITE(Matrix4D) +{ + // displays message + TEST(message) + { std::cout << "Starting Matrix4D tests..." << std::endl ; } + + // constructor + TEST(constructor) + { for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { for(size_t l=0; l<10; l++) + { std::vector dim = {i,j,k,l} ; + Matrix4D m(i,j,k,l) ; + CHECK_EQUAL(dim.size(), m.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ; + CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies()), + m.get_data_size()) ; + } + } + } + } + } + + // test constructor value + TEST(constructor_value) + { int n = 999 ; + for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { for(size_t l=0; l<10; l++) + { std::vector dim = {i,j,k,l} ; + Matrix4D m(i,j,k,l,n) ; + CHECK_EQUAL(dim.size(), m.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m.get_dim(), dim.size()) ; + CHECK_EQUAL(std::accumulate(begin(dim), end(dim), 1, std::multiplies()), + m.get_data_size()) ; + for(const auto& i : m.get_data()) + { CHECK_EQUAL(n, i) ; } + } + } + } + } + } + + // tests copy constructor + TEST(constructor_copy) + { int n = 999 ; + for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { for(size_t l=0; l<10; l++) + { std::vector dim = {i,j,k,l} ; + Matrix4D m(i,j,k,l,n) ; + Matrix4D m2(m) ; + CHECK_EQUAL(m, m2) ; + } + } + } + } + } + + // tests contructor from file, uses the == operator + TEST(constructor_file) + { + std::string file_int1("./src/Unittests/data/matrix4d_int1.mat") ; + std::string file_int2("./src/Unittests/data/matrix4d_int2.mat") ; + std::string file_int3("./src/Unittests/data/matrix4d_int3.mat") ; + std::string file_int4("./src/Unittests/data/matrix4d_int4.mat") ; + std::string file_int5("./src/Unittests/data/matrix4d_int5.mat") ; + std::string file_int6("./src/Unittests/data/matrix4d_int6.mat") ; + std::string file_int7("./src/Unittests/data/matrix4d_int7.mat") ; + std::string file_int8("./src/Unittests/data/matrix4d_int8.mat") ; + std::string file_int9("./src/Unittests/data/matrix4d_int9.mat") ; + std::string file_int10("./src/Unittests/data/matrix4d_int10.mat") ; + std::string file_int11("./src/Unittests/data/matrix4d_int11.mat") ; + std::string file_int12("./src/Unittests/data/matrix4d_int12.mat") ; + std::string file_int13("./src/Unittests/data/matrix4d_int13.mat") ; + std::string file_int14("./src/Unittests/data/matrix4d_int14.mat") ; + std::string file_int15("./src/Unittests/data/matrix4d_int15.mat") ; + std::string file_int16("./src/Unittests/data/matrix4d_int16.mat") ; + std::string file_int17("./src/Unittests/data/matrix4d_int17.mat") ; + std::string file_int18("./src/Unittests/data/matrix4d_int18.mat") ; + std::string file_int19("./src/Unittests/data/matrix4d_int19.mat") ; + std::string file_int20("./src/Unittests/data/matrix4d_int20.mat") ; + std::string file_dbl1("./src/Unittests/data/matrix4d_double1.mat") ; + std::string file_ghost("./src/Unittests/data/foo.mat") ; + + + std::vector v_int = { 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10,11,12, + 13,14,15, + 16,17,18, + 19,20,21, + 22,23,24, + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10,11,12, + 13,14,15, + 16,17,18, + 19,20,21, + 22,23,24} ; + + std::vector v_dbl = { 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10,11,12, + 13,14,15, + 16,17,18, + 19,20,21, + 22,23,24, + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10,11,12, + 13,14,15, + 16,17,18, + 19,20,21, + 22,23,24} ; + + std::vector dim = {2,3,2,4} ; + + // matrix of int + Matrix4D m_int(file_int1) ; + CHECK_EQUAL(dim.size(), m_int.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m_int.get_dim(), dim.size()) ; + CHECK_EQUAL(v_int.size(), m_int.get_data_size()) ; + CHECK_ARRAY_EQUAL(v_int, m_int.get_data(), v_int.size()) ; + + // matrix with only 1 int + Matrix4D m_int2(file_int18) ; + CHECK_EQUAL(Matrix4D(1,1,1,1,1), m_int2) ; + + // empty matrix (empty file) + Matrix4D m_int3(file_int19) ; + CHECK_EQUAL(Matrix4D(0,0,0,0), m_int3) ; + + // empty matrix (only eol in file) + Matrix4D m_int4(file_int20) ; + CHECK_EQUAL(Matrix4D(0,0,0,0), m_int3) ; + + + // these files are not well formatted + CHECK_THROW(Matrix4D m_int5(file_int2), std::runtime_error) ; // empty lines + CHECK_THROW(Matrix4D m_int5(file_int3), std::runtime_error) ; // empty lines + CHECK_THROW(Matrix4D m_int5(file_int4), std::runtime_error) ; // empty lines + CHECK_THROW(Matrix4D m_int5(file_int5), std::runtime_error) ; // empty lines + CHECK_THROW(Matrix4D m_int5(file_int6), std::runtime_error) ; // empty lines + CHECK_THROW(Matrix4D m_int5(file_int7), std::runtime_error) ; // first line problem + CHECK_THROW(Matrix4D m_int5(file_int8), std::runtime_error) ; // first line problem + CHECK_THROW(Matrix4D m_int5(file_int9), std::runtime_error) ; // first line problem + CHECK_THROW(Matrix4D m_int5(file_int10), std::runtime_error) ; // second line problem + CHECK_THROW(Matrix4D m_int5(file_int11), std::runtime_error) ; // extra column + CHECK_THROW(Matrix4D m_int5(file_int12), std::runtime_error) ; // missing column + CHECK_THROW(Matrix4D m_int5(file_int13), std::runtime_error) ; // extra row + CHECK_THROW(Matrix4D m_int5(file_int14), std::runtime_error) ; // extra 2d slice + CHECK_THROW(Matrix4D m_int5(file_int15), std::runtime_error) ; // extra 2d slice + CHECK_THROW(Matrix4D m_int5(file_int16), std::runtime_error) ; // last line problem + CHECK_THROW(Matrix4D m_int5(file_int17), std::runtime_error) ; // mixded data types + + // this file does not exist + CHECK_THROW(Matrix4D m_int3(file_ghost), std::runtime_error) ; + + // matrix of double + Matrix4D m_dbl(file_dbl1) ; + CHECK_EQUAL(dim.size(), m_dbl.get_dim_size()) ; + CHECK_ARRAY_EQUAL(dim, m_dbl.get_dim(), dim.size()) ; + CHECK_EQUAL(v_dbl.size(), m_dbl.get_data_size()) ; + CHECK_ARRAY_EQUAL(v_dbl, m_dbl.get_data(), v_dbl.size()) ; + } + + // tests get() + TEST(get) + { int n = 999 ; + for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { for(size_t l=0; l<10; l++) + { std::vector dim = {i,j,k,l} ; + Matrix4D m(i,j,k,l,n) ; + for(size_t a=0; a coord = convert_to_coord(m, a) ; + CHECK_EQUAL(m.get(a), m.get(coord[0], coord[1], coord[2], coord[3])) ; + } + } + } + } + } + } + + // tests set() + TEST(set) + { int n = 999 ; + for(size_t i=0; i<10; i++) + { for(size_t j=0; j<10; j++) + { for(size_t k=0; k<10; k++) + { for(size_t l=0; l<10; l++) + { std::vector dim = {i,j,k,l} ; + Matrix4D m(i,j,k,n) ; + for(size_t a=0; a coord = convert_to_coord(m, a) ; + m.set(coord[0], coord[1], coord[2], coord[3], a) ; + } + for(size_t a=0; a dim = {i,j,k,l} ; + Matrix4D m(i,j,k,l) ; + for(size_t a=0; a m2("./src/Unittests/data/matrix4d_out.mat") ; + // any matrix with at least one zero dimension is a null + // matrix + if(i==0 or j==0 or k==0 or l==0) + { CHECK_EQUAL(Matrix4D(0,0,0,0), m2) ; } + else + { CHECK_EQUAL(m, m2) ; } + } + } + } + } + } +} + diff --git a/src/Utility/Vector_utility.hpp b/src/Utility/Vector_utility.hpp new file mode 100755 index 0000000..1cb4784 --- /dev/null +++ b/src/Utility/Vector_utility.hpp @@ -0,0 +1,82 @@ +#ifndef VECTOR_UTILITY_HPP +#define VECTOR_UTILITY_HPP + +#include +#include +#include + + +/*! + * \brief Overload the << operator for vectors. + * \param out an output stream. + * \param v a vector of interest. + * \param sep a character to separate the values. + * \return a reference to the output stream. + */ +template +std::ostream& operator<<(std::ostream& out, const std::vector& v) +{ for(auto i:v) + { out << i << " " ; } + return out ; +} + + +/*! + * \brief A method to print a vector. Unlike the << operator, this + * function allows to specify the separator character. + * \param out an output stream. + * \param v a vector of interest. + * \param sep a character to separate the values. + */ +template +void print_vector(std::ostream& out, const std::vector& v, char sep=' ') +{ for(auto i:v) + { out << i << sep ; } +} + + +/*! + * \brief Given a vector and an offset, this function pads (shifts) + * the vector content by adding values (0 by default) on one side of + * the vector but keeping the size of the vector constant. + * \param v the vector. + * \param offset the offset, negative means padding to the left, positive + * means padding to the right. + * \param value the value to use for the padding. + */ +template +void pad_vector(std::vector& v, int offset, T value=0.) +{ + int from = 0 ; + int to = 0 ; + size_t len = v.size() ; + + assert((offset < v.size()) or (offset > v.size())) ; + + // padding to the left + if(offset < 0) + { from = std::abs(offset) ; + to = static_cast(len-1) ; + for(int i=from; i<=to; i++) + { v[i+offset] = v[i] ; } + // fill freed slots (extreme right) with values + for(size_t i=to+offset+1; i 0) + { from = 0 ; + to = len - 1 - offset ; + for(int i=to; i>=from; i--) + { v[i+offset] = v[i] ; } + // fill freed slots (extreme left) with values + for(size_t i=from; i +#include +#include +#include + + +template +std::ostream& operator << (std::ostream& stream, const std::vector& v) +{ for(const auto& x : v) + { stream << x << " " ; } + stream << std::endl ; + return stream ; +} + +int main() +{ + Matrix4D prob("/local/groux/scATAC-seq/prob.mat4d") ; + + auto dim = prob.get_dim() ; + + std::cout << "dimensions " << dim << std::endl ; + + std::vector sum_row(dim[0], 0.) ; + std::vector sum_col(dim[1], 0.) ; + + for(size_t i=0; i + +#include +#include + +// run all tests +int main() +{ + return UnitTest::RunAllTests(); +} +