setwd(file.path("/", "local", "groux", "scATAC-seq"))

# libraries
library(RColorBrewer)
library(clues)

# functions
source(file.path("scripts", "functions.R"))

#' Performs a hard assigment.
#' Each region is assigned to the
#' class for which it has been classified
#' with the highest probability, over
#' all shift and flip states.
#' \param prob a 4D array containing
#' the posterior probabilities. It has
#' the following dimensions :
#' 1st number of regions
#' 2nd number of classes
#' 3rd number of shifts
#' 4th number of flips
#' \return a vector of labels, 1 per
#' region.
#' \author Romain Groux
hard.assign = function(prob)
{ 
  prob.per.region = apply(prob, c(1,2), sum)
  cluster = apply(prob.per.region, 1, which.max)
  return(cluster)
}

# path to the images for the logo
path.a = file.path("res/A.png")
path.c = file.path("res/C.png")
path.g = file.path("res/G.png")
path.t = file.path("res/T.png") 

# the true labels 5000 CTCF sites and 5000 SP1 sites
true.labels = c(rep(1,1000),
                rep(2,1000))

# the expected dimensionality of the prob array to read
# -1 indicate values that will change (class and shift)
dim  = c(2000, -1, -1, 2)

#  the parameters used to run the EM
n.shifts   = c(90)
n.classes = 1:2

#number of time a classification was repeated
n.repeat  = 50

# where the results are
dir.results = file.path("results",
                        "10xgenomics_PBMC_5k_motifs_classification_7")

# ari values
ari = array(dim=c(length(n.classes),
                  length(n.shifts),
                  n.repeat))

for(i in 1:length(n.classes))
{
  n.class = n.classes[i]
  
  for(j in 1:length(n.shifts))
  {
    n.shift = n.shifts[j]
    
    # update dimensions
    dim[2] = n.class
    dim[3] = n.shift
    
    
    # go over each repetition
    for(k in 1:n.repeat)
    { file.prob = file.path(dir.results,
                            sprintf("ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_%dclass_%dshift_prob_%d.txt", 
                                    n.class, n.shift, k))
      file.motif = file.path(dir.results,
                            sprintf("ctcf_motifs_10e-6_myc_motifs_10e-6_sequences_rmsk_%dclass_%dshift_model_%d.mat", 
                                    n.class, n.shift, k))
      
      # prob = read.arraytxt(file.prob, dim)
      # print(apply(prob, 2, sum)/sum(prob))
      # cluster = hard.assign(prob)
      # ari[i,j,k] = adjustedRand(cluster, true.labels)["HA"]
      
      X11(width=10, height=12)
      par(mfrow=c(2,1))
      motif = read.sequence.models(file.motif)$models
      plot.logo(motif[,,1], path.a, path.c, path.g, path.t)
      plot.logo(motif[,,2], path.a, path.c, path.g, path.t)
    }
  }
}