#genocrunchlib.R

################
# Print message in a formatted way so it can be decomposed into categories of
# logs. Can also be used to issue errors
# Args:
#  (1) label    (character) A label for the message (ex: title, log, warning...)
#  (2) text     (character) Message to print
#  (3) verbose  (character) A statement: 'TRUE' or same value as 'label':print
#               to stdout; 'FALSE':don't print;
# Prints to stdout:
#  Tab-separated: Path to executed script, date and time, 'label' and 'text'
################
log_fp <- NULL
PrintMsg <- function(text='default message', verbose=TRUE, stop=FALSE, log=log_fp) {

  if (!is.null(log)) {
    f <- file(log, open="at")
    sink(file=f, append=TRUE)
    sink(type="message")
  }
  if (as.logical(verbose) == TRUE) {
    cat(text)
  }
  if (!is.null(log)) {
    sink()
    sink(type="message")
    close(f)
  }
  if (as.logical(stop) == TRUE) {
    stop(text)
  }
}

################
# Write a table to an output location, with the option of having indented
# columns name if 'name'=''.
# Args:
#  (1) table      Table object
#  (2) output.fp  Path to the output file
#  (3) name       Name of first column
#  (4) sep        Separator character
#  (5) na         na
#  (6) dec        dec
#  (7) quote      quote
# Outputs:
#  Writes 'table' to 'output.fp', starting first line with an indentation ('\t')
################
WriteTable <- function(table=NULL, output.fp='', name='name', sep='\t', na='',
                       dec='.', quote=FALSE) {
  write.table(t(c(name, names(table))), file=output.fp, append=FALSE,
              row.names=FALSE, col.names=FALSE, sep=sep, na=na, dec=dec,
              quote=quote)
  write.table(table, file=output.fp, append=TRUE, row.names=TRUE,
              col.names=FALSE, sep=sep, na=na, dec=dec, quote=quote)
}

################
# Reads the "Rfunctions" section of a json file as a json string
# Args:
#  (1) json  Path to the json file
# Outputs:
#  A json string as found in the "Rfunctions" section of the json file
################
ReadJson <- function(file='/../etc/genocrunchlib.json', rel=TRUE) {
  suppressMessages(library('rjson'))

  if (rel) {
    # Consider file as a relative path with respect to this library
    args <- commandArgs(trailingOnly=FALSE)
    dir <- dirname(sub('--file=', '', args[grep('--file=', args)]))

    file <- normalizePath(paste(dir,
                                file,
                                sep=''),
                          mustWork=TRUE)
  }

  if (!file.exists(file)) {
    PrintMsg(paste('"error":"File not found: ',
                   file,
                   '."',
                   sep=''),
                   verbose=TRUE, stop=TRUE) 
  }

  json_str <- fromJSON(file=file)

  return (toJSON(json_str$choices))
}

################################################################################
################################################################################
# MANIPULATION #################################################################
# MANIPULATION #################################################################
# MANIPULATION #################################################################
# MANIPULATION #################################################################
################################################################################
################################################################################

################
# Convert a data frame of factors to a numeric data frame. Unlike data.matrix,
# it will force characters to numeric.
# Args:
#  (1) x  A data frame
# Returns:
#  A numeric version of 'x'
################
ForceNumericDataFrame <- function(x) {
  x <- as.matrix(x)
  for (i in 1:ncol(x)) {
    x[, i] <- as.numeric(as.vector(unlist(x[, i])))
  }
  return (as.data.frame(x))
}

################
# Convert values of a numeric vector to percents of its sum
# Args:
#  (1) x  A numeric vector
# Returns:
#  A percent version of 'x'
################
Numeric2Percent <- function(x){
  return (100*x/sum(x))
}

################
# Scale a numeric vector on a range of values
# Args:
#  (1) x      A numeric vector
#  (2) range  A numeric vector containing 2 elements
#  (3) force  A logical indicating wheter infinite values should be forced into
#             'range'
# Returns:
#  A scaled version of 'x', within 'range'
################
RangeScale <- function(x=NULL, range=c(0, 1), force=TRUE) {
  scaled <- range[1]*(1-((x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x,
            na.rm=TRUE))))+range[2]*((x-min(x, na.rm=TRUE))/(max(x,
            na.rm=TRUE)-min(x, na.rm=TRUE)))
  if (force == TRUE) {
    scaled[scaled == -Inf] <- min(range)
    scaled[scaled == Inf] <- max(range)
  }
  return (scaled)
}

################
# Scale by standard deviation and center on mean
# Args:
#  (1) x      A numeric vector
# Returns:
#  A scaled version of 'x'
################
cScale <- function(x=NULL) {
  sd <- sd(x, na.rm=TRUE)
  mean <- mean(x, na.rm = TRUE)
  if (is.na(sd)) {
    sd <- 1
  } else if (sd == 0) {
    sd <- 1
  }
  if (is.na(mean)) {
    mean <- 0
  }
  return (x/sd-mean)
}

################################################################################
################################################################################
# JSON HANDELING ###############################################################
# JSON HANDELING ###############################################################
# JSON HANDELING ###############################################################
# JSON HANDELING ###############################################################
################################################################################
################################################################################

################
# Build a json string to store figure data
# Args:
#  (1) data       (data-frame)
#  (2) xmetadata  (data-frame) With nrow=ncol(data)
# Returns:
#  A json string
################
Dataframe2DataJson <- function(data=NULL, xmetadata=NULL){

  # Rules:
  #  colname(data) -> [{'name'}]
  #  rowname(data) and data -> [{'data':['rowname(data)':'data']}]
  #  xmetadata -> [{'rowname(xmetadata)'}]

  json <- c(1:ncol(data))
  for (i in 1:ncol(data)) {
    xmetadata.json <- NULL
    if (!is.null(xmetadata)) {
      xmetadata.json <- c(1:ncol(xmetadata))
      for (j in 1:ncol(xmetadata)) {
        xmetadata.json[j] <- paste('"',
                                   names(xmetadata)[j],
                                   '":"',
                                   xmetadata[i, j],
                                   '"',
                                   sep='')
      }
      xmetadata.json <- paste(xmetadata.json, collapse=',')
    }

    data.json.el <- c(1:nrow(data))
    for (j in 1:nrow(data)) {
      data.json.el[j] <- paste('"',
                               row.names(data)[j],
                               '":"',
                               data[j, i],
                               '"',
                               sep='')
    }
    data.json <- paste('{', paste(data.json.el, collapse=','), '}',sep='')

    json[i] <- paste('{',
                     paste(c(paste('"name":"',
                                   names(data)[i],
                                   '"',
                                   sep=''),
                             xmetadata.json,
                             paste('"data":',
                                   data.json,
                                   sep='')),
                           collapse=','),
                     '}',
                     sep='')
  }

  return (paste('[', paste(json, collapse=','), ']', sep=''))
}


################
# Build a json string to store diversity values
# Args:
#  (1) diversity       (list)
#  (2) p.value  (list)
#  (3) map      (matrix)
# Returns:
#  A json string
################
diversity2json <- function(diversity=NULL, p.value=NULL, map=NULL){
    rar <- row.names(diversity)
    nsample <- ncol(diversity)
    sample <- c(1:nsample)
    for (j in 1:nsample) {
      ndata <- nrow(diversity)
      data <- c(1:ndata)
      for (k in 1:ndata) {
        data[k] <- paste('"',
                       rar[k],
                       '":',
                       diversity[k, j],
                       sep='')
      }
      metadata <- c(1:ncol(map))
      for (k in 1:ncol(map)) {
        metadata[k] <- paste('"',
                            names(map[k]),
                            '":"',
                            map[j, k],
                            '"',
                            sep='')
      }
      sample[j] <- paste('{"name":"',
                         row.names(map)[j],
                         '",',
                         paste(metadata, collapse=','),
                         ',"data":{',
                         paste(data, collapse=','),
                         '}}',
                         sep='')
    }
    if (!is.null(p.value)) {
      stat <- c(1:length(p.value))
      for (j in 1:length(p.value)) {
        s <- c(1:length(p.value[[j]]))
        for (k in 1:length(p.value[[j]])) {
          s[k] <- paste('{"name":"',
                         names(p.value[[j]])[k],
                         '", "p-value":"',
                         p.value[[j]][1, k],
                         '"}',
                         sep='')
        }
        stat[j] <- paste(s, collapse=',')
      }
      json <- paste('{"data":[',
                    paste(sample, collapse=','),
                    '],"stat":[',
                    paste(stat, collapse=','),
                    ']}',
                    sep='')
    } else {
      json <- paste('{"data":[',
                    paste(sample, collapse=','),
                    ']}',
                    sep='')
    }
  return (json)
}


################
# Build a json tree from dendrogram
# Args:
#  (1) dendrogram  (list) A dendrogram (nested lists)
# Returns:
#  A json string
################
dendrogram2json <- function(dendrogram=NULL, json=NULL, it=0) {
  start <- FALSE
  if (is.null(json)) {
    start <- TRUE
    json <- paste('{"name":"node',
                  it,
                  '", "children":[',
                  sep='')
  }

  json.vect <- c(1:length(dendrogram))
  for (i in 1:length(dendrogram)) {
    if (length(dendrogram[[i]]) > 1) {
      it <- it+1
      json.vect[i] <- paste('{"name":"node',
                            it,
                            '", "children":[',
                            sep='')
      dendrogram2json.out <- dendrogram2json(dendrogram[[i]], json.vect[i], it)

      json.vect[i] <- dendrogram2json.out[[1]]
      it <- dendrogram2json.out[[2]]
      json.vect[i] <- paste(json.vect[i], ']}', sep='')
    } else {
      json.vect[i] <- paste('{"name":"',
                            dendrogram[[i]],
                            '"}',
                            sep='')

    }
  }

  if (start) {
    json <- paste(json, paste(json.vect, collapse=','), ']}', sep='')
  } else {
    json <- paste(json, paste(json.vect, collapse=','), sep='')
  }

  out <- list()
  out[1] <- json
  out[2] <- it
  return (out)
}

################
# Build a json network from similarity matrix
# Args:
#  (1) mat  list of numeric matrix
# Returns:
#  A json string
################
buildFusionNetworkJson <- function(mat=NULL, map=NULL){
  names <- colnames(mat[[1]])
  ncol <- ncol(mat[[1]])
  if (!is.null(map)) {
    ncol.map <- ncol(map)
  }
  nodes <- c(1:ncol)
  for (i in 1:ncol){
    if (!is.null(map)) {
      map.row <- c(1:ncol.map)
      for (j in 1:ncol.map) {
        map.row[j] <- paste('"',
                            names(map)[j],
                            '":"',
                            map[i, j],
                            '"',
                            sep='')
      }
    } else {
      map.row <- '"none":"none"'
    }
    nodes[i] <- paste('{"id":',
                      i-1,
                      ',"name":"',
                      names[i],
                      '",',
                      paste(map.row, collapse=','),
                      '}',
                      sep='')
  }
  links <- c(1:(ncol*(ncol/2-1)))
  k <- 1
  for (i in 1:(ncol-1)) {
    for (j in (i+1):ncol) {
      weights <- c(1:length(mat))
      for (l in 1:length(mat)) {
        weights[l] <- mat[[l]][i,j] 
      } 
      links[k] <- paste('{"source":',
                        i-1,
                        ',"target":',
                        j-1,
                        ',"weight":[',
                        paste(weights,collapse = ","),
                        ']}',
                        sep='')
      k <- k+1
    }
  }
  return(paste('{"nodes":[', paste(nodes, collapse = ","),'],"links":[', paste(links, collapse = ","), ']}', sep=''))
}

################################################################################
################################################################################
# MODIFICATION #################################################################
# MODIFICATION #################################################################
# MODIFICATION #################################################################
# MODIFICATION #################################################################
################################################################################
################################################################################

################
# Sort a data-frame columns according to a column of another
# Args:
#  (1) table   (matrix-like object) Table to be sorted
#  (2) map     (matrix-like object) Map to extract the sorting column from
#  (3) column  (numeric) Index of the column in map to use for sorting
#  (4) verbose (bool) Print messages? see PrintMsg() options
# Returns:
#  A sorted version of table
# Print to stdout:
# A description
################
SortTable <- function(table=NULL, map=NULL, verbose=TRUE) {
#  print(row.names(map))
#  print(map)
#  print(map[, column])
  PrintMsg('"description":"The table columns were sorted according to the map."', verbose)
  return(as.data.frame(table[, as.vector(row.names(map))]))
}

################
# Transpose a data frame (or a matrix-like object)
# Args:
#  (1) x  (matrix-like object)
# Returns:
#  A transposed version of 'x' (numeric dataframe)
################
Transpose <- function(x=NULL) {
  if (is.data.frame(x)) {
    names <- names(x)
    row.names <- row.names(x)
    x <- as.data.frame(t(x))
    names(x) <- row.names
    row.names(x) <- names
  } else {
    x <- t(x)
  }
  return (x)
}

################
# Filter numeric table (with category-specific options for OTU-type tables) 
# Args:
#  (1) table                (numeric dataframe) Count table
#  (2) category             (character) Name of the (optional) category column
#  (3) abundance_threshold  (numeric) Min % abundance per column [0,100]
#  (4) presence_threshold   (numeric) Min % presence per row [0,100]
#  (5) rm_unassigned        (logical) Remove unassigned category ?
#  (6) verbose              (character) Print messages? see PrintMsg() options
# Returns:
#  An filtered version of 'table' (numeric dataframe)
# Prints to stdout:
#  A description of the function
################
FilterTable <- function(table=NULL, category='taxonomy',
                        abundance_threshold=0.03,
                        abundance_threshold_type='percent',
                        presence_threshold=1,
                        presence_threshold_type='int',
                        verbose=TRUE) {

  nrow.init <- nrow(table)

  # First filter based on minimal abundance
  msg <- paste('"description":"Removed rows not exceeding ', abundance_threshold, sep='')
  filter <- table[, names(table) != category]
  minmax.a <- paste(min(filter), max(filter), sep='-')
  if (abundance_threshold_type == 'percent') {
    filter <- as.data.frame(apply(filter, 2, Numeric2Percent))
    msg <- paste(msg, '%', sep='')
    minmax.a <- paste(min(filter), '-', max(filter), '%', sep='')
  }
  max.abundance <- apply(filter, 1, max)
  table.filtered <- table[max.abundance >= abundance_threshold, ]
  nrow.filtered.a <- nrow(table.filtered)
  msg <- paste(msg,
               ' in at least one column: ',
               nrow.init-nrow.filtered.a,
               ' rows were removed out of ',
               nrow.init,
               '. Then, removed rows with presence (number of values not equal to zero) lower than ',
               presence_threshold,
               sep='')

  # Then filter based on minimal presence
  filter <- table.filtered[, names(table.filtered) != category]

  presence <- rowSums(filter != 0)
  minmax.p <- paste(min(presence), max(presence), sep='-')
  if (presence_threshold_type == 'percent') {
    presence <- Numeric2Percent(presence)
    msg <- paste(msg, '%', sep='')
    minmax.p <- paste(min(presence), '-', max(presence), '%', sep='')
  }
  table.filtered <- table.filtered[presence >= presence_threshold, ]
  nrow.filtered.p <- nrow(table.filtered)

  nstop <- nrow(table.filtered)
  ab.min <- round(min(filter), digits=4)
  ab.max <- round(max.abundance, digits=4)
  msg <- paste(msg,
               ': ',
               nrow.filtered.a-nrow.filtered.p,
               ' rows were removed out of ',
               nrow.filtered.a,
               ' (final number of rows = ',
               nrow.filtered.p,
               '). Min-max abundance found was ',
               minmax.a,
               '. Min-max presence found was ',
               minmax.p,
               '."',
               sep='')

  PrintMsg(msg, verbose)

  if (nrow.filtered.p == 0) {
    PrintMsg(paste('"error":"All the rows were filtered out. Please chose an abundance threshold within ', minmax.a, ' and a presence threshold within ', minmax.p, '."', sep=''), verbose, TRUE)
  }

  return (table.filtered)
}

################
# Bin numeric table by category
# Args:
#  (1) table     (numeric dataframe) Count table
#  (2) aggr.col  (character) Name of the column in 'table' to consider for data
#                aggregation
#  (3) fun       (character) Function to use for data aggregation. Valid
#                choices: 'mean', 'sum'
#  (4) verbose   (character) Print messages? see PrintMsg() options
# Returns:
#  An aggregated version of 'table' (numeric dataframe)
# Prints to stdout:
#  A description of the function
################
BinTableByCategory <- function(table=NULL, category_col='taxonomy', fun='sum',
                               level='NA', verbose=TRUE, vstyle=1) {

  if ((category_col == '') || !(category_col %in% names(table)) ) {
    category_col <- "row names"
    category <- row.names(table)
  } else {
    category <- paste(table[, names(table) == category_col],
                      '; (',
                      row.names(table),
                      ')',
                      sep='')
    table <- table[, names(table) != category_col]
  }
  category <- strsplit(category, ';')


  if (vstyle == 1) {
    PrintMsg(paste('"description":"Observations were binned by categories (category column:',
                   category_col,
                   ', function:',
                   fun,
                   ', level:',
                   level,
                   ')."',
                   sep=''), verbose)
  } else if (vstyle == 2) {
    PrintMsg(paste('"description":"Observations were binned by categories (category column:',
                 category_col,
                 ', function:',
                 fun,
                 '):"',
                 sep=''), verbose)
  }
  if (vstyle > 1) {
    PrintMsg(paste('"description-item":"Category level ',
                 level,
                 '"',
                 sep=''), verbose)
  }


  for (i in 1:length(category)) {
    category[[i]] <- gsub('^ *.__ *$', '', category[[i]])
    category[[i]] <- gsub('^ *', '', category[[i]])
    category[[i]] <- gsub(' *$', '', category[[i]])
    category[[i]] <- category[[i]][! category[[i]] %in% c('', ' ')]
    if (length(category[[i]]) >= level) {
      category[[i]] <- paste(category[[i]][1:level], collapse='; ')
    } else {
      category[[i]] <- paste(category[[i]], collapse='; ')
    }
  }
  category <- list(unlist(category))
  name <- 'category'
  while (name %in% names(table)) {
    name <- paste(name,paste(sample(c(0:9, letters, LETTERS), 3), collapse=''),sep='_')
  }
  names(category) <- name

  table.binned <- aggregate(table,
                              by=category, FUN=fun)

  row.names(table.binned) <- table.binned[, name]
  table.binned <- table.binned[, names(table.binned) != name]

  return(table.binned)
}

################
# Divide a data frame (or a matrix-like object) by a vector
# Args:
#  (1) table    (numeric dataframe) Count table
#  (2) vect     (vector) Vector to divide 'table'
#  (3) verbose  (character) Print messages? see PrintMsg() options
# Returns:
#  A divided (per row) version of 'table' (numeric dataframe)
# Prints to stdout:
#  A description of the function
################
DivTable <- function(table=NULL, vect=NULL, verbose='TRUE') {
  PrintMsg('log', 'Divide tables rows by a vector.', verbose=verbose)

  return (as.matrix(table) / unlist(vect))
}

################
# Transform to log2 count per milion
# Args:
#  (1) table    (numeric dataframe) Count table
#  (2) verbose  (character) Print messages? see PrintMsg() options
# Returns:
#  A transformed version of 'table' (numeric dataframe)
# Prints to stdout:
#  A description of the function
################
ApplyLog2Cpm <- function(table=NULL, verbose='TRUE') {
  if (length(table[table < 0]) > 0) {
    PrintMsg('"error":"The log2cpm transformation was not applied because negative values were detected."',
             verbose, TRUE)
    return ()
  }
  if (length(table[table%%1 > 0]) > 0) {
    PrintMsg('"warning":"Non-integer values were detected."', verbose)
    table <- round(table, digits=0)
  }

  PrintMsg('"description":"Values were converted into per-million per column and a log-transformation was applied (log2(x+1))."',
           verbose)

  transformed.table <- as.data.frame(log2(1+(apply(as.matrix(table),
                                                   2,
                                                   Numeric2Percent))*10000))
  names(transformed.table) <- names(table)
  row.names(transformed.table) <- row.names(table)

  return (transformed.table)
}

################
# Transform to log2
# Args:
#  (1) table    (numeric dataframe) Count table
#  (2) verbose  (character) Print messages? see PrintMsg() options
# Returns:
#  A transformed version of 'table' (numeric dataframe)
# Prints to stdout:
#  A description of the function
################
ApplyLog2 <- function(table=NULL, verbose=TRUE) {
  if (length(table[table < 0]) > 0) {
    PrintMsg('"error":"log2(x+1) was not applied because negative values were detected."',
             verbose, TRUE)
    return ()
  }
  PrintMsg('"description":"A log-transformation was applied (log2(x+1))."',
           verbose)

  transformed.table <- as.data.frame(log2(1+(as.matrix(table))))
  names(transformed.table) <- names(table)
  row.names(transformed.table) <- row.names(table)

  return (transformed.table)
}

################
# Transform to percent (per column)
# Args:
#  (1) table    (numeric dataframe) Count table
#  (2) verbose    (character) Print messages? see PrintMsg() options
# Returns:
#  A transformed version of 'table' (numeric dataframe)
# Prints to stdout:
#  A description of the function
################
ApplyCount2Percent <- function(table, verbose=TRUE) {
  PrintMsg('"description":"Values were converted into percentages per column."', verbose=verbose)
  transformed.table <- as.data.frame(apply(as.matrix(table), 2, Numeric2Percent))
  names(transformed.table) <- names(table)
  row.names(transformed.table) <- row.names(table)
  return (transformed.table)
}

################
# Apply rarefaction
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) sample     (positive integer or 'max') Sampling depth
#  (3) nsampling  (positive integer) Number of repeated sampling to performe
#  (4) verbose    (character) Print messages? see PrintMsg() options
# Returns:
#  A rarefied version of 'table' (a numeric dataframe)
# Prints to stdout:
#  A description of the function
################
RarefyTable <- function(table=NULL, sample='max', nsampling=1, verbose=TRUE) {
  suppressMessages(library('vegan'))

  # Round counts if not integers
  if (length(table[table < 0]) > 0) {
    PrintMsg('"error":"Rarefaction is not applicable because negative values were detected."', verbose, TRUE)
    return ()
  }
  if (length(table[table%%1 > 0]) > 0) {
    PrintMsg('"warning":"Non-integer values were detected. In order to permit rarefaction, these values were rounded to the nearest integer."', verbose)
    table <- round(table, digits=0)
  }

  # Define/check rarefaction depth
  min.sample <- min(colSums(table))
  if (sample == 'max' || as.numeric(sample) > min.sample) {
    sample <- min.sample
  }

  if (sample < 100) {
    PrintMsg(paste('"warning":"Very low counts detected in the following sample(s): ',
                        paste(names(table)[which(colSums(table) < 100)], collapse=','),
                        '."',
                        sep=''), verbose)
  } else if (sample < 1000) {
    PrintMsg(paste('"warning":"Low counts detected in the following sample(s): ',
                        paste(names(table)[which(colSums(table) < 1000)], collapse=','),
                        '."',
                        sep=''), verbose)
  }

  # Apply rarefaction
  table <- t(table)
  table.list <- list()
  for (i in 1:as.numeric(nsampling)) {
    table.list[[i]] <- rrarefy(table, sample=sample)
  }
  rarefied.table <- apply(simplify2array(table.list), c(1, 2), mean)
  # element-wise mean across rarefied tables

  PrintMsg(paste('"description":"The number of observations per samples were equalized using the rarefaction method with a sampling depth of ',
                        sample,
                        ' (random sampling without replacement) (R package vegan). The rarefied values were calculated as the means of ',
                        nsampling,
                        ' independent samplings that were rounded to the nearest integer."',
                        sep=''), verbose=verbose)

  return (data.frame(t(round(rarefied.table, digits=0))))
}


################
# Args:
#  (1) table    (numeric dataframe) Count table
#  (2) map      (dataframe) Design
#  (3) effect   (character) comma-delimited list of random effects to
#               suppress. Elements must be names of columns in mapping file
#  (4) verbose  (character) Print messages? see PrintMsg() options
# Returns:
#  A adjusted version of 'table' (a numeric dataframe)
# Prints to stdout:
#  A description of the function
################
SuppressBatchEffect <- function(table=NULL, map=NULL, effect=NULL,
                                fun='combat', verbose=TRUE) {
  method <- list(combat='ComBat', mean_centering='mean centering')
  PrintMsg(paste('"description":"Batch effect suppression on ',
                 effect,
                 ' using',
                 method[[fun]],
                 '"'),
           verbose)

  effect <- unlist(strsplit(effect, split=','))

  if (fun == 'mean_centering') {
    table <- t(table)

    for (i in 1:length(effect)) {
      fact <- map[effect[i]]
      fact.name <- as.vector(unique(unlist(fact)))
      nfact <- length(fact.name)

      for (j in 1:ncol(table)) {
        data <- data.frame(d=table[, j], fact=fact)
        means <- NULL
        for (k in 1:nfact) {
          means <- append(means, mean(data[fact == fact.name[k], 'd']))
        }
        for (k in 1:nfact) {
          adj <- max(means)-means[k]
          data[fact == fact.name[k], 'd'] <- data[fact == fact.name[k], 'd']+adj
        }
        table[, j] <- data$d
      }
    }
    table <- t(table)
  } else if (fun == 'combat') {
    suppressMessages(library('sva'))
    table <- ComBat(table, map[, effect])
  }
  return (table)
}


################
# Filter mapping file to keep only terms in model
# Args:
#  (1) map             (dataframe)
#  (2) model           (formula vector)
# Returns:
#  A filtered map
################
FilterMap <- function(map=NULL, model=NULL) {

  # keep only columns of map that are used in the model
  if (!is.null(model)) {
    term <- list()
    for (i in 1:length(model)) {
      term[[i]] <-as.vector(unique(unlist(strsplit(model[i], split=c('[^A-Za-z0-9_]')))))
      term[[i]] <- term[[i]][! term[[i]] %in% c("", " ", "1", "Error")]
    }
    term <- as.vector(unlist(term))
    row.names <- row.names(map)
    if (length(term) > 1) {
      map <- as.data.frame(map[, names(map) %in% term])
    } else {
      map <- as.data.frame(map[, names(map) == term])
      names(map) <- term
    }
    if (ncol(map) < length(unique(term))) {
      PrintMsg(paste('"error":"Some terms in model ',
               model,
               ' were not found in the map. Please make sure that all model terms refer to columns headers in the map."',
               sep=''),
               verbose=TRUE, TRUE)
    }
  }

  return (map)
}

################
# Remove constant rows from table
# Args:
#  (1) table             (dataframe)
# Returns:
#  A filtered table
################
RemoveConstantRows <- function(table=NULL, verbose=TRUE) {

  filter <- rep(TRUE, nrow(table))
  for (i in 1:nrow(table)) {
    if (length(as.vector(unique(unlist(table[i, ])))) < 2) {
      filter[i] <- FALSE
      PrintMsg(paste('"warning":"Row ', i, ' (', row.names(table)[i], ') contained essentially constant data and was removed."', sep=''),
               verbose=verbose)
    }
  }
  return (table[filter == TRUE, ])
}

################################################################################
################################################################################
# ANALYSIS #####################################################################
# ANALYSIS #####################################################################
# ANALYSIS #####################################################################
# ANALYSIS #####################################################################
################################################################################
################################################################################

################
# Apply a clustering algorithm
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) fun        (character) Clustering function (algorithme)
#  (3) json       (character) Json string specifying packages  
#  (4) verbose    (character) Print messages? see PrintMsg() options
#  (5) graphical  (logical) Generate graphics?
# Returns:
#  A table of clusters
# Output to device:
#  Depending on 'fun', a figure
# Prints to stdout:
#  A description of the function
################
PerformClustering <- function(table=NULL, fun=c('kmeans', 'pam', 'pam-bray', 'pam-jaccard'), json=libJson,
                              verbose=TRUE, graphical=TRUE) {

  if (ncol(table) < 10 || nrow(table) < 3) {
    PrintMsg('"warning":"Not enough data to perform clustering (min 10 column and 3 rows)."',
             verbose)
    return (NULL)
  } else if (ncol(table) > 65536) {
    PrintMsg('"warning":"Sorry, the dataset is too large for clustering algorithms (max 65536 columns)."',
             verbose)
    return (NULL)
  }
  spec <- as.data.frame(do.call('rbind', json$clustering))
  spec <- spec[spec$value == fun, ]
  package <- ''
  pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=','))
  if (length(pkg) > 0) {
    for (i in 1:length(pkg)) {
      suppressMessages(library(pkg[i], character.only=TRUE))
    }
    package <- paste(' (R package {', unlist(spec$pkg), '})', sep='')
  }
  PrintMsg(paste('"description":"Clusters were determined using the ', spec$label,
           package, '."', sep=''),
           verbose)

  table <- t(table)

  if (fun == 'kmeans') {


    if (graphical == TRUE) {
      kmean.out <- kmeansruns(table, criterion='asw', plot=F)$cluster  # figure margins too large issue...
    } else {
      kmean.out <- kmeansruns(table, criterion='asw')$cluster
    }
    cluster <- paste('kmean',
                     kmean.out,
                     sep ='')

  } else if (fun == 'pam') {
    pam.out <- pamk(table, usepam=TRUE)
    if (graphical == TRUE) {
      plot(pam.out$pamobject, which.plots=2)
    }
    cluster <- paste('pam',
                     pam.out$pamobject$cluster,
                     sep ='')
  } else if (fun == 'pam-bray') {

    dist.data <- ComputeDistance(table=table, fun='bray', json=json,
                     verbose=verbose)

    pam.out <- pamk(dist.data, usepam=TRUE)
    if (graphical == TRUE) {
      plot(pam.out$pamobject, which.plots=2)
    }

    cluster <- paste('pam',
                     pam.out$pamobject$cluster,
                     sep ='')

  } else if (fun == 'pam-jaccard') {

    dist.data <- ComputeDistance(table=table, fun='jaccard', json=json,
                     verbose=verbose)

    pam.out <- pamk(dist.data, usepam=TRUE)
    if (graphical == TRUE) {
      plot(pam.out$pamobject, which.plots=2)
    }

    cluster <- paste('pam',
                     pam.out$pamobject$cluster,
                     sep ='')
  } else {
    return(NULL)
  }

  cluster <- as.data.frame(cluster)
  row.names(cluster) <- row.names(table)
  names(cluster) <- 'Clustering'
  PrintMsg(paste('"description":"Clusters: ',
           paste(paste(row.names(cluster), unlist(cluster), sep=':'), collapse=', '),
           '."', sep=''),
           verbose)
  return (cluster)
}

################
# Build a relative abundance stacked barplot
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) threshold  (numeric) Abundance cutoff for display in percent [0;100]
#  (3) verbose    (character) Print messages? see PrintMsg() options
#  (4) graphical  (logical) Generate graphics?
# Returns:
#  Figure data as a json string
# Output to device:
#  A figure
# Prints to stdout:
#  A description of the function
################
AnalyseProportions <- function(table=NULL,
                               map=NULL,
                               verbose=TRUE, graphical=TRUE) {

  # Convert to relative abundances per column
  PrintMsg('"description":"Data was converted to proportions (%) per sample."',
           verbose)
  if (min(table) < 0) {
    PrintMsg('"warning":"Negative values were found! Negative values are not supported by this analysis."',
             verbose)
    table <- as.data.frame(abs(table))
  }
  table <- as.data.frame(apply(table, 2, Numeric2Percent))

  # Order by decreasing values
  table <- as.data.frame(table[order(rowMeans(table), decreasing=TRUE), ])

  out <- list()
  out[['txt']] <- table
  out[['json']] <- Dataframe2DataJson(data=table, xmetadata=map)

  # Build stacked barchart
  if (as.logical(graphical) == TRUE) {
    col <- rainbow(nrow(table))
    par(mar=c(4, 4, 2, 4), xpd=TRUE)
    barplot(as.matrix(table),
            ylim=c(0, max(colSums(table))),
            space=0,
            border=NA,
            names.arg=names(table),
            ylab='%',
            col=col,
            las=2,
            bty="n",
            cex.names=1.84906*ncol(table)^-0.2898)
    # Build legend in a separate figure
    plot.new()
    legend("topleft", row.names(table), cex=0.8,
           fill=col, horiz=FALSE)
  }

  return (out)
}

################
# Calculate diversity per column
# Args:
#  (1) table    (numeric dataframe) Count table
#  (2) fun      (character) The diversity function to use
#  (3) json     (character) Json string specifying packages
#  (4) verbose  (character) Print messages? see PrintMsg() options
# Returns:
#  A vector of length=ncol(table)
# Prints to stdout:
#  A description of the function
################
ComputeDiversity <- function(table=NULL, fun='shannon', json=libJson,
                             verbose=TRUE) {

  spec <- as.data.frame(do.call('rbind', json$diversity))
  spec <- spec[spec$value == fun, ]
  package <- ''
  pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=','))
  if (length(pkg) > 0) {
    for (i in 1:length(pkg)) {
      suppressMessages(library(pkg[i], character.only=TRUE))
    }
    package <- paste(' (R package {', unlist(spec$pkg), '})', sep='')
  } else {
    pkg <- ''
  }

  PrintMsg(paste('"description":"The ',
                 spec$label,
                 package,
                 ' was calculated for each sample."', sep=''), verbose)

  diversity <- NULL
  if (pkg == 'vegan') {
    for (i in 1:ncol(table)) {
      diversity <- append(diversity, diversity(table[, i], index=fun, MARGIN=0,
                          base=2), after=length(diversity))
    }
  } else if (pkg == 'ineq') {
    for (i in 1:ncol(table)) {
      diversity <- append(diversity, ineq(table[, i], parameter=NULL, type=fun,
                          na.rm=TRUE), after=length(diversity))
    }
  } else if (fun == 'richness') {
    for (i in 1:ncol(table)) {
      diversity <- append(diversity, sum(table[, i] != 0),
                          after=length(diversity))
    }
  } else if (fun == 'chao1') {
    diversity <- chao1(table, taxa.row = TRUE)
  } else {
    PrintMsg(paste('"error":"Unknown argument to fun (2):', fun, '"', sep=' '),
             verbose, TRUE)
  }

  return (diversity)
}

################
# Compute correlations
# Args:
#  (1) table    (numeric dataframe) Count table
#  (2) fun      (character) The correlation methode to use
#  (3) json     (character) Json string specifying packages
#  (4) verbose  (character) Print messages? see PrintMsg() options
# Returns:
#  A vector of length=ncol(table)
# Prints to stdout:
#  A description of the function
################
ComputeCorrelation <- function(table=NULL, fun='pearson', test=FALSE,
                               json=libJson, verbose=TRUE) {

  spec <- as.data.frame(do.call('rbind', json$correlation))
  spec <- spec[spec$value == fun, ]
  package <- ''
  pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=','))
  if (length(pkg) > 0) {
    for (i in 1:length(pkg)) {
      suppressMessages(library(pkg[i], character.only=TRUE))
    }
    package <- paste(' (R package {', unlist(spec$pkg), '})', sep='')
  }

  PrintMsg(paste('"description":"Calculated ', spec$label, package, '."', sep=''),
           verbose=verbose)

  if (as.logical(test) == TRUE) {
    cor.output <- list()
    cor.output[['estimate']] <- matrix(ncol=nrow(table), nrow=nrow(table))
    cor.output[['p.value']] <- matrix(ncol=nrow(table), nrow=nrow(table))
    for (i in 2:nrow(table)) {
      for (j in 1:(i-1)) {
        out <- cor.test(unlist(table[i, ]), unlist(table[j, ]), method=fun)
        cor.output[['estimate']][i, j] <- out$estimate
        cor.output[['p.value']][i, j] <- out$p.value
      }
    }
    cor.output[['estimate']][upper.tri(cor.output[['estimate']])] <- t(cor.output[['estimate']])[upper.tri(cor.output[['estimate']])]
    diag(cor.output[['estimate']]) <- 1
    cor.output[['p.value']][upper.tri(cor.output[['p.value']])] <- t(cor.output[['p.value']])[upper.tri(cor.output[['p.value']])]
    diag(cor.output[['p.value']]) <- 0
    return (cor.output)
  }

  if (fun == 'pearson') {
    return (cor(t(table), method=fun, use='pairwise.complete.obs'))
  } else {
    return (cor(t(table), method=fun, use='na.or.complete'))
  }
}

################
# Compute distances
# Args:
#  (1) table    (numeric dataframe) Count table
#  (2) fun      (character) The distance function to use
#  (3) json     (character) Json string specifying packages
#  (4) verbose  (character) Print messages? see PrintMsg() options
# Returns:
#  A vector of length=ncol(table)
# Prints to stdout:
#  A description of the function
################
ComputeDistance <- function(table=NULL, fun='bray', json=libJson,
                            verbose=TRUE) {

  spec <- as.data.frame(do.call('rbind', json$distance))
  spec <- spec[spec$value == fun, ]
  package <- ''
  pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=','))
  if (length(pkg) > 0) {
    for (i in 1:length(pkg)) {
      suppressMessages(library(pkg[i], character.only=TRUE))
    }
    package <- paste(' (R package {', unlist(spec$pkg), '})', sep='')
  }


  if (fun %in% c('pearson', 'spearman')) {

    PrintMsg(paste('"description":"A distance matrix was calculated based on ',
                          spec$label, package, '."', sep=''), verbose=verbose)
    return (as.dist(as.matrix(ComputeCorrelation(table, fun=fun, json=json,
                                         verbose=FALSE))))

  } else {
    # Correct for negative entries by adding a scalar (Caillez correction method)
    if (min(table) < 0) {

      table <- table+abs(min(table))

      PrintMsg(paste('"description":"A Caillez correction was applied and a distance matrix was calculated based on ',
               spec$label, package, '."', sep=''), verbose=verbose)

    } else {
      PrintMsg(paste('"description":"A distance matrix was calculated based on ',
                            spec$label, package, '."', sep=''), verbose=verbose)
    }

    if ('vegan' %in% pkg) {
      return (vegdist(table, method=fun, binary=FALSE, diag=TRUE, upper=TRUE))
    }
  }

}

################
# Test if model is balanced
# Args:
#  (1) map       (data-frame) Experimental design table
#  (2) model     (character) A formula representing the model for stats. Terms
#                must refer to names(map)
#  (3) verbose   (character) Print messages? see PrintMsg() options
# Returns:
#  A bool indicating whether the design is balanced or not
################
ModelIsBalanced <- function(map=NULL, model=NULL, verbose='TRUE') {
  # keep only columns of map that are used in the model
  map <- FilterMap(map=map, model=model)
  for (i in 1:ncol(map)) {
    fact <- as.vector(unique(unlist(map[, i])))
    n <- length(map[map[, i] == fact[1], i])
    if (length(fact) > 1) {
      for (j in 2:length(fact)) {
        if (length(map[map[, i] == fact[j], i]) != n) {
          PrintMsg('log', paste('Model ',model , ' is not balanced.', sep=''), verbose=verbose)
          return (FALSE)
        }
      }
    }
  }

  PrintMsg('note', paste('Model ',model , ' is balanced.', sep=''), verbose=verbose)
  return (TRUE)
}

################
# Test if model's variables could be nested
# Args:
#  (1) map       (data-frame) Experimental design table
#  (2) model     (character) A formula representing the model for stats. Terms
#                must refer to names(map)
#  (3) verbose   (character) Print messages? see PrintMsg() options
# Returns:
#  A list of nested variables
################
ModelIsNested <- function(map=NULL, model=NULL, verbose='TRUE') {

  # keep only columns of map that are used in the model
  map <- FilterMap(map=map, model=model)
  if (ncol(map) < 2) {
    return (NULL)
  }

  out <- list()
  out.txt <- NULL
  for (i in 1:ncol(map)) {
    fact <- as.vector(unique(unlist(map[, i])))
    test <- as.data.frame(map[, -i])
    names(test) <- names(map)[-i]
    out[[names(map)[i]]] <- NULL
    for (j in 1:ncol(test)) {
      group1 <- as.vector(test[map[, i] == fact[1], j])
      nested <- TRUE
      if (length(as.vector(unique(map[, i]))) >= length(as.vector(unique(test[, j])))) {
        nested <- FALSE
      } else if (length(as.vector(unique(group1))) != length(group1)) {
        nested <- FALSE
      } else {
        for (k in 2:length(fact)) {
          if (!identical(group1, as.vector(test[map[, i] == fact[k], j]))) {
            nested <- FALSE
            break
          }
        }
      }
      if (nested == TRUE) {
        out[[names(map)[i]]] <- append(out[[names(map)[i]]], names(test)[j], after=length(out[[names(map)[i]]]))
        out.txt <- append(out.txt, paste(names(test)[i], names(map)[j], sep='/'), after=length(out.txt))
      }
    }
  }
  if (length(out) == 0) {
    return (NULL)
  }
  PrintMsg('log', paste('The following nesting was detected in model ',
           model,
           ': ',
           paste(out.txt, collapse=', '),
           ' Please control that the model is the one you want.',
           sep=''), verbose=verbose)
  return (out)
}

################
# Compute statistics
# Args:
#  (1) response  (numeric vector)
#  (2) map       (data-frame) Experimental design table with
#                nrow=length(response)
#  (3) method    (character) Statistics to use
#  (4) model     (character) A formula representing the model for stats. Terms
#                must refer to names(map)
#  (5) pairwise  (bool) Perform pairwise comparision?
#  (6) verbose   (character) Print messages? see PrintMsg() options
# Returns:
#  A data-frame
# Prints to stdout:
#  A description of the function
################
ComputeStats <- function(response=NULL, map=NULL, method='anova',
                         model=NULL, pairwise=TRUE, verbose=TRUE) {

  if (is.null(method)) {
    method <- 'anova'
    PrintMsg('"warning":"The statistical method was not specified. An ANOVA will be performed by default."', verbose)
  }

  # If no model is specified, assume all columns in map are fixed variables
  if (is.null(model)) {
    model <- paste(names(map), collapse='+')
    PrintMsg(paste('"warning":"No model was specified. Based on the map, the following model will be used by default: ',
            model,
            '."',
            sep=''), verbose)
  }
  # Test that each factor has >= 2 levels
  term <- as.vector(unique(unlist(strsplit(model, split=c('[^A-Za-z0-9_]')))))
  term <- term[! term %in% c("", " ", "1", "Error")]
  for (i in 1:length(term)) {
    if (!term[i] %in% names(map)) {
      PrintMsg(paste('"error":"The following model factor was not found in map: ',
              term[i],
              '. Please correct the model or correct the map."',
              sep=''), verbose=TRUE, stop=TRUE)
    }
    levels <- as.vector(unique(unlist(map[, term[i]])))
    if (length(levels) < 2) {
      PrintMsg(paste('"error":"The following model factor has less than 2 levels: ',
              term[i],
              '. Please correct the model or correct the map."',
              sep=''), verbose=TRUE, stop=TRUE)
    }
    for (j in 1:length(levels)) {
      if (length(map[map[, term[i]] == levels[j], ]) == 1) {
        PrintMsg(paste('"warning":"The following group contains only one element (variance is null): ',
                levels[j],
                '. This will impact on statistics."',
                sep=''), TRUE)
      }
    }
  }

  response.name <- 'response'
  while (response.name %in% names(map)) {
    response.name <- paste('response', paste(sample(c(letters, LETTERS, c(0:9)), 4), collapse=''), sep='')
  }
  names(response) <- response.name
  formula <- paste(response.name, ' ~ ', model, sep='')

  # Build the dataframe
  data <- map
  data <- cbind(map, response)

  pairwise.output <- NULL
  if (method == 'anova' || is.null(method)) {
    # PARAMETRIC, ACCEPTS MODELS WITH >2 FACTORS, MULTIPLE FIXED VARIABLES AND NESTING
    # MODEL CAN BE OF THE FORM "a*b+c"

    PrintMsg(paste('"description":"An analysis of variance (ANOVA) was performed using the model: ', model,
             '."', sep=''), verbose)

    aov.output <- aov(formula=as.formula(formula), data=data)

    if (pairwise == TRUE) {
      pairwise.output <- TukeyHSD(aov.output)
    }

    summary <- as.data.frame(summary(aov.output)[[1]])
    if ('Pr(>F)' %in% names(summary)) {
      p.value <- summary[, 'Pr(>F)']
    } else {
      PrintMsg(paste('"warning":"Impossible to calculate a p-value. Please make sure that the model ',
               model, ' is not saturated."',
               sep=''), verbose)
      p.value <- rep('NA', nrow(summary))
    }

    summary <- data.frame(row.names=gsub('[[:space:]]', '', row.names(summary)),
                          p.value=p.value)
                     #explained=summary[, 'Sum Sq']/sum(summary[, 'Sum Sq']))

  } else if (method == 'friedman') {
    # NON-PARAMETRIC, ACCEPTS MODELS WITH >2 FACTORS BUT REQUIRES EXACTLY 2 NESTED VARIABLE
    # MODEL HAS TO BE OF THE FORM "a | b"
    friedman.terms <- unlist(strsplit(model, split=' '))
    if (length(friedman.terms) != 3 || friedman.terms[2] != '|') {
      PrintMsg(paste('"error":"The model must be of the form a | b when using Friedman test. Please provide an appropriate model. The current model is: ',
               model, '."', sep=''), verbose, TRUE)
    }

    friedman.output <- friedman.test.with.post.hoc(formula=as.formula(formula), data=data)

    PrintMsg(paste('"description":"A Friedman test was performed using the model: ', model,
                   '."', sep=''), verbose)

    if (pairwise == TRUE) {
      pairwise.output <- list()
      pairwise.output[[term[1]]] <- data.frame(row.names=paste(unique(unlist(map[, term[1]])), collapse='-'),
                                              p=friedman.output$PostHoc.Test)
      PrintMsg('"description":"A post-hoc test with correction for multiple comparisions was performed."', verbose)
    }

    summary <- data.frame(row.names=term[1],
                          p.value=friedman.output$Friedman.Test)

  } else if (method == 'kruskal') {
    # NON-PARAMETRIC, ACCEPTS MODELS WITH >2 FACTORS BUT ONLY 1 FIXED VARIABLE
    # MODEL CAN BE OF THE FORM "a"
    if (length(term) > 1) {
      PrintMsg(paste('"error":"The model cannot include more than 1 variable when using Kruskal-Wallis rank sum test. Please provide an appropriate model. The current model is: ',
               model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE)
    }

    suppressMessages(library('dunn.test'))
    dunn.output <- dunn.test(data[, response.name], g=map[, term[1]], kw=FALSE, table=FALSE, list=FALSE)

    PrintMsg(paste('"description":"A Kruskal-Wallis rank sum test was performed using the model: ', model,
                   '."', sep=''), verbose)

    if (pairwise == TRUE) {  
      pairwise.output <- list()
      pairwise.output[[term[1]]] <- data.frame(row.names=paste(unique(unlist(map[, term[1]])), collapse='-'),
                                              p=dunn.output$P)
      PrintMsg('"description":"A post-hoc test with correction for multiple comparisions (R library {dunn.test}) was performed."', verbose)
    }

    summary <- data.frame(row.names=term[1],
                          p.value=pchisq(dunn.output$chi2, nrow(data)-1, lower.tail=FALSE))


  } else if (method == 'wilcox') {
    # NON-PARAMETRIC, ACCEPTS MODELS WITH 2 FACTORS ONLY AND ONE FIXED VARIABLES
    # MODEL HAS TO BE OF THE FORM "a"
    if (length(term) > 1) {
      PrintMsg(paste('"error":"The model cannot include more than 1 variable when using Wilcoxon rank sum test. Please provide an appropriate model. The current model is: ',
               model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE)
    }
    samples <- unique(unlist(data[, model]))
    if (length(samples) != 2) {
      PrintMsg(paste('"error":"The Wilcoxon rank sum test can only perform two samples comparison. Please provide an appropriate model. The current model includes the following samples: ',
               sample, '."', sep=''), verbose, TRUE)
    }

    wilcox.output <- wilcox.test(formula=as.formula(formula), data=data)
    PrintMsg(paste('"description":"A Wilcoxon rank sum test was performed using the model: ', model,
                   '."', sep=''), verbose)

#    summary <- data.frame(row.names=paste(samples, collapse='-'),
    summary <- data.frame(row.names=model,
                          p.value=wilcox.output$p.value)

    pairwise.output <- list()
    pairwise.output[[model]] <- as.data.frame(wilcox.output$p.value)
    names(pairwise.output[[model]]) <- model
    row.names(pairwise.output[[model]]) <-paste(samples, collapse='-')

  } else if (method == 'wilcox_paired') {
    # NON-PARAMETRIC, ACCEPTS MODELS WITH 2 FACTORS ONLY AND ONE FIXED VARIABLES. NESTING IS ASSUMED FROM THE ORDER OF THE DATA!
    # MODEL HAS TO BE OF THE FORM "a"
    if (length(term) > 1) {
      PrintMsg(paste('"error":"The model cannot include more than 1 variable when using Wilcoxon signed rank test. Please provide an appropriate model. The current model is: ',
               model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE)
    }
    samples <- unique(unlist(data[, model]))
    if (length(samples) != 2) {
      PrintMsg(paste('"error":"The Wilcoxon signed rank test can only perform two samples paired comparison. Please provide an appropriate model. the current model includes the following samples: ',
               paste(samples, collapse=', '),
               '."',
               sep=''),
               verbose, TRUE)
    }
    x <- data[data[, model] == samples[1], model]
    y <- data[data[, model] == samples[2], model]
    if (length(x) != length(y)) {
      PrintMsg(paste('"error":"A paired comparison in Wilcoxon signed rank test is possible only if samples have same size. Please provide an appropriate model, correct the map or use different statistics. Current samples are: ',
               samples[1],
               '(n=',
               length(x),
               '), ',
               samples[2],
               '(n=',
               length(y),
               ')."',
               sep=''),
               verbose, TRUE)
    }

    wilcox.output <- wilcox.test(formula=as.formula(formula), data=data, paired=TRUE)
    PrintMsg(paste('"description":"A Wilcoxon signed rank test was performed using the model: ',
                   model,
                   '. Paires: ',
                   paste(paste(row.names(data)[data[, model] == samples[1]], row.names(data)[data[, model] == samples[2]], sep='-'), collapse=', '),
                   '."', sep=''), verbose)

#    summary <- data.frame(row.names=paste(samples, collapse='-'),
    summary <- data.frame(row.names=model,
                          p.value=wilcox.output$p.value)

    pairwise.output <- list()
    pairwise.output[[model]] <- as.data.frame(wilcox.output$p.value)
    names(pairwise.output[[model]]) <- model
    row.names(pairwise.output[[model]]) <-paste(samples, collapse='-')

  } else if (method == 'ttest') {
    # PARAMETRIC, ACCEPTS MODELS WITH 2 FACTORS ONLY AND ONE FIXED VARIABLES.
    # MODEL HAS TO BE OF THE FORM "a"
    if (length(term) > 1) {
      PrintMsg(paste('"error":"The model cannot include more than 1 variable when using t-test. Please provide an appropriate model. The current model is: ',
               model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE)
    }
    samples <- unique(unlist(data[, model]))
    if (length(samples) != 2) {
      PrintMsg(paste('"error":"A t-test can only perform two samples comparison. Please provide an appropriate model. The current model includes the following samples: ',
               paste(samples, collapse=', '),
               '."',
               sep=''),
               verbose, TRUE)
    }

    ttest.output <- t.test(formula=as.formula(formula), data=data)
    PrintMsg(paste('"description":"A paired t-test was performed using the model: ',
                   model,
                   '."',
                   sep=''),
                   verbose)

#    summary <- data.frame(row.names=paste(samples, collapse='-'),
    summary <- data.frame(row.names=model,
                          p.value=ttest.output$p.value)

    pairwise.output <- list()
    pairwise.output[[model]] <- as.data.frame(ttest.output$p.value)
    names(pairwise.output[[model]]) <- model
    row.names(pairwise.output[[model]]) <-paste(samples, collapse='-')

  } else if (method == 'ttest_paired') {
    # PARAMETRIC, ACCEPTS MODELS WITH 2 FACTORS ONLY AND ONE FIXED VARIABLES. NESTING IS ASSUMED FROM THE ORDER OF THE DATA!
    # MODEL HAS TO BE OF THE FORM "a"
    if (length(term) > 1) {
      PrintMsg(paste('"error":"A model cannot include more than 1 variable when using t-test. Please provide an appropriate model. The current model is: ',
               model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE)
    }
    samples <- unique(unlist(data[, model]))
    if (length(samples) != 2) {
      PrintMsg(paste('"error":"A paired t-test can only perform two samples comparison. Please provide an appropriate model. The current model includes the following samples: ',
               paste(samples, collapse=', '),
               '."',
               sep=''),
               verbose, TRUE)
    }
    x <- data[data[, model] == samples[1], model]
    y <- data[data[, model] == samples[2], model]
    if (length(x) != length(y)) {
      PrintMsg(paste('"error":"A paired comparison in t-test is possible only if samples have same size. Please provide an appropriate model, correct the map or use different statistics. The current samples are: ',
               samples[1],
               '(n=',
               length(x),
               '), ',
               samples[2],
               '(n=',
               length(y),
               ')."',
               sep=''),
               verbose, TRUE)
    }

    ttest.output <- t.test(formula=as.formula(formula), data=data, paired=TRUE)
    PrintMsg(paste('"description":"A paired t-test was performed using the model: ',
                   model,
                   '. Paires: ',
                   paste(paste(row.names(data)[data[, model] == samples[1]], row.names(data)[data[, model] == samples[2]], sep='-'), collapse=', '),
                   '."', sep=''), verbose)

#    summary <- data.frame(row.names=paste(samples, collapse='-'),
    summary <- data.frame(row.names=model,
                          p.value=ttest.output$p.value)

    pairwise.output <- list()
    pairwise.output[[model]] <- as.data.frame(ttest.output$p.value)
    names(pairwise.output[[model]]) <- model
    row.names(pairwise.output[[model]]) <-paste(samples, collapse='-')
  }

  stat <- list(summary=summary, pairwise=pairwise.output)
  return (stat)
}

################
# Plot rarefaction curves
# Args:
#  (1) data      (numeric matrix) rarefaction table
# Output to device:
#  A figure
################
PlotRarefactionCurves <- function(data=NULL, xlab='Rarefaction depth', ylab='') {
  color.palette <- rainbow(ncol(data))
  plot(x=row.names(data),
       y=c(1:nrow(data)),
       type="n",
       ylim=c(min(data), max(data)),
       xlab=xlab,
       ylab=ylab)

  for (i in 1:ncol(data)) {
    lines(x=row.names(data),
          y=data[, i],
          col=color.palette[i])
  }
  return ()
}

################
# Perform a diversity analysis
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) map        (data-frame) Experimental design table with nrow=ncol(table)
#  (3) fun        (character) The diversity function to use
#  (4) model      (formula vector) A formula representing the model for stats. Terms
#                 must refer to names(map)
#  (5) json       (character) Json string specifying packages
#  (6) verbose    (character) Print messages? see PrintMsg() options
#  (7) graphical  (logical) Generate graphics?
# Returns:
#  Figure data as a json string
# Output to device:
#  A figure
# Prints to stdout:
#  A description of the function
################
AnalyseDiversity <- function(table=NULL, map=NULL, fun=c('richness', 'shannon'),
                             nrar=20, compare_diversity=FALSE, stats=NULL, model=NULL,
                             json=libJson, verbose=TRUE, graphical=TRUE) {
  if (is.null(fun)) {
    fun <- 'richness'
  }
  suppressMessages(library('vegan'))

  fun <- fun[fun != '']


  # Round counts if not integers
  if (length(table[table < 0]) > 0) {
    PrintMsg('"error":"Rarefaction is not applicable because negative values were detected."', verbose, TRUE)
    return ()
  }
  if (length(table[table%%1 > 0]) > 0) {
    PrintMsg('"warning":"Non-integer values were detected. In order to permit rarefaction, these values were rounded to the nearest integer."', verbose)
    table <- round(table, digits=0)
  }

  # Compute rarefaction curves for diversity
  colsums <- colSums(table)
  min.colsum <- floor(min(colsums))
  ncol <- ncol(table)
  sample <- matrix(rep(floor(seq(from=0, to=min.colsum, length.out=nrar)), ncol), ncol=ncol, byrow=FALSE)
  diversity <- list()
  map <- FilterMap(map=map, model=model)

  rarefied.table <- list()
  for (i in 1:nrar) {
    rarefied.table[[i]] <- rrarefy(t(table), sample=apply(rbind(sample[i, ], colsums), 2, min))
  }

  PrintMsg('"description":"This is a diversity analysis."', verbose)

  PrintMsg(paste('"description":"The dataset was rarefied until ', min.colsum, ' counts per sample."', sep=''), verbose)

  out <- list()
  out[['txt']] <- as.data.frame(matrix(ncol=length(fun), nrow=ncol))
  names(out[['txt']]) <- fun
  row.names(out[['txt']]) <- names(table)

  data.json <- c(1:length(fun))
  for (i in 1:length(fun)) {
    div <- matrix(ncol=ncol, nrow=nrar)
    v <- TRUE
    for (j in 1:nrar) {
      div[j, ] <- ComputeDiversity(table=t(rarefied.table[[j]]),
                                    fun=fun[i],
                                    json=json,
                                    verbose=v)
      v <- FALSE
    }
    colnames(div) <- colnames(table)
    row.names(div) <- sample[, 1]

    diversity[[i]] <- as.matrix(div)
    out[['txt']][, i] <- div[nrar, ]

    # Build figures
    if (graphical == TRUE) {
      PlotRarefactionCurves(div, ylab=fun[i])
    }

    # Calculate statistics
    if (compare_diversity == TRUE) {
      p.value <- list()
      for (j in 1:length(model)) {
        if (length(stats) == length(model)) {
          method <- stats[j]
        } else {
          method <- stats[1]
        }
        stat <- ComputeStats(response=div[nrar, ], map=map, method=method,
                             model=model[j], pairwise=FALSE, verbose=TRUE)$summary
        p <- as.data.frame(t(as.matrix(stat$p.value)))
        names(p) <- row.names(stat)
        p <- as.data.frame(p[, names(p) != 'Residuals'])
        # the following is usefull in case model contains only one element
        names(p) <- row.names(stat)[row.names(stat) != 'Residuals']
        row.names(p) <- fun[i]
        p.value[[j]] <- p
      }
      PrintMsg(paste('"description":"The statistics were performed at a rarefaction depth of ', min.colsum, ' counts per sample."', sep=''), verbose)
    } else {
      p.value <- NULL
    }
    data.json[i] <- paste('"', fun[i], '":', diversity2json(diversity[[i]], p.value, map), sep='')
  }

  out[['json']] <- paste('{', paste(data.json, collapse=','), '}', sep='')
  return (out)
}


################
# Permutational Multivariate Analysis of Variance Using Distance Matrices
# Args:
#  (1) model      (character)
#  (2) map        (data-frame) Experimental design table with nrow=ncol(table)
#  (3) fun        (character) The distance function to use
#  (4) graphical  (logical) Generate graphics?
# Returns:
#  A summary (data-frame)
# Prints to stdout:
#  A description of the function
################
Adonis <- function(table=NULL, model=NULL, strata=NULL, map=NULL,
                   fun='bray', graphical=TRUE) {

  if (!is.null(model)) {
    formula <- paste('table ~ ', model, sep='')
  } else {
    formula <- paste('table ~ ', paste(names(map), collapse='*'), sep='')}

  if (!is.null(strata)) {
    if (strata == '') {
      strata == NULL
    } else {
      #formula <- paste(formula, strata, sep=' / ')
      strata <- map[, strata]
    }
  }

  adonis.output <- adonis(formula=as.formula(formula), data=as.data.frame(map),
                          strata=strata, method=fun,
                          permutations=1000)

  summary <- data.frame(row.names=row.names(adonis.output$aov.tab))

  summary <- data.frame(row.names=gsub('[[:space:]]', '',
                                       row.names(adonis.output$aov.tab)),
                        p.value=adonis.output$aov.tab[, 'Pr(>F)'],
                        explained=adonis.output$aov.tab[, 'R2'])

  summary <- summary[row.names(summary) != 'Total', ]

  # Build the figure
  if (graphical == TRUE) {

    pie(summary$explained,
        labels=row.names(summary),
        col=rainbow(length(summary$explained)),
        init.angle=0,
        radius=0.7,
        cex=0.7,
        xpd=TRUE)
    legend(x=-1, y=-1, xjust=1, yjust=0.5,
           title='p-value',
           legend=round(summary$p.value, digits=4),
           fill=rainbow(nrow(summary)),
           cex=0.7, xpd=TRUE)
  }

  return (summary)
}


################
# Permutational Multivariate Analysis of Variance Using Distance Matrices
# Args:
#  (1) table           (numeric dataframe) Count table
#  (2) map             (data-frame) Experimental design table with nrow=ncol(table)
#  (3) fun             (character) The distance function to use
#  (4) model           (character vector) A formula representing the model for stats. Terms
#                      must refer to names(map)
#                      must refer to names(map)
#  (5) verbose         (character) Print messages? see PrintMsg() options
#  (6) graphical       (logical) Generate graphics?
# Returns:
#  Figure data as a json string
# Output to device:
#  A figure
# Prints to stdout:
#  A description of the function
################
PerformAdonis <- function(table=NULL, map=NULL, fun='bray',
                          model=NULL, strata=NULL,
                          json=libJson, verbose=TRUE, graphical=TRUE) {

  suppressMessages(library('vegan'))

  #out.json <- c(1:length(fun))
  #for (k in 1:length(fun)) {

  spec <- as.data.frame(do.call('rbind', json$distance))
  spec <- spec[spec$value == fun, ]
  package <- ''
  pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=','))
  if (length(pkg) > 0) {
    package <- paste(' (R package {', unlist(spec$pkg), '})', sep='')
  }

  PrintMsg(paste('"description":"This is a permutational multivariate analysis of variance using the Adonis method (R package {vegan}) based on the ',
           spec$label,
           package,
           '."',
           sep=''),
           verbose)

  if ((nrow(table) < 4) || (ncol(table) < 4)) {
    PrintMsg('"error":"Not enough data to perform statistics."', TRUE, TRUE)
  }

  # Correct for negative entries by adding a scalar (Caillez correction method)
  if (min(table) < 0) {
    c <- abs(min(table))
    table <- table+c

    PrintMsg(paste('"description":"Negative values were corrected using a Caillez correction (',
             c,
             ' added to all values)."',
             sep=''),
             verbose)
  }

  if (is.null(strata) || strata == '') {
    strata <- rep(NULL, length(model))
  }
  if (!is.null(strata) && length(strata) != length(model)) {
    PrintMsg(paste('"warning":"Number of strata (',
                   length(strata),
                   ') differs from number of models(',
                   length(model),
                   '). No strata will be used."',
                   sep=''),
                   verbose)

    strata <- rep(NULL, length(model))
  }

  # Perform perMANOVA
  #map <- FilterMap(map=map, model=model)
  data.json <- c(1:length(model))
  for (i in 1:length(model)) {
    stat <- Adonis(table=t(table),
                   model=model[i], strata=strata[i], map=map,
                   fun=fun, graphical=graphical)

    stat.json <- c(1:nrow(stat))
    for (j in 1:nrow(stat)) {

      stat.json[j] <- paste('{"name":"',
                            row.names(stat)[j],
                            '","p-value":"',
                            stat$p.value[j],
                            '","explained":"',
                            stat$explained[j],
                            '"}',
                            sep='')
    }
    data.json[i] <- paste('"',
                     model[i],
                     '":[',
                     paste(stat.json, collapse=','),
                     ']',
                     sep='')
  }

  #out.json[k] <- paste('"',
  #                     fun[k],
  #                     '":"{',
  #                     paste(data.json, collapse=','),
  #                     '}',
  #                     sep='')
  #}
  #return (paste('{',paste(out.json, collapse=','), '}', sep=''))

  return (paste('{',paste(data.json, collapse=','), '}', sep=''))
}

################
# Principal Component Analysis (PCA)
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) map        (data-frame) Experimental design table with nrow=ncol(table)
#  (3) model      (character vector) A formula representing the model for stats. Terms
#                 must refer to names(map)
#  (4) col        (character) Name of a column in map to use to define colors
#  (5) biplot     (numeric) Show n=biplot factors with biggest norms on the
#                 biplot
#  (6) verbose    (character) Print messages? see PrintMsg() options
#  (7) graphical  (logical) Generate graphics?
# Returns:
#  Figure data as a json string
# Output to device:
#  A figure
# Prints to stdout:
#  A description of the function
################
PerformPCA <- function(table=NULL, map=NULL, biplot=TRUE,
                       verbose=TRUE, graphical=TRUE) {


  PrintMsg('"description":"This is a principal component analysis (PCA) (R package {FactoMineR})."',
           verbose)

  if ((nrow(table) < 2) || (ncol(table) < 2)) {
    PrintMsg('"error":"Not enough dimensions to perform dimmensional reduction."', TRUE, TRUE)
  }

  suppressMessages(library('FactoMineR'))

  ncp <- min(max(2, ncol(table)), 5)
  pca.output <- PCA(t(table), ncp=ncp, scale.unit=FALSE, graph=FALSE)

  if (graphical == TRUE) {
    plot(pca.output, new.plot=FALSE)
  }

  # Pack data into a json string
  data.ind <- as.data.frame(t(as.data.frame(pca.output$ind$coord)))
  row.names(data.ind) <- gsub('Dim.', 'PC', row.names(data.ind))

  out <- list()
  out[['txt']] <- as.data.frame(t(data.ind))
  out[['json']] <- Dataframe2DataJson(data=data.ind, xmetadata=map)

  eig.json <- c(1:ncp)
  for (i in 1:ncp) {
    eig.json[i] <- paste('"PC',
                         i,
                         '":"',
                         pca.output$eig[i, 2],
                         '"',
                         sep='')
  }

  if (biplot == TRUE) {
    data.var <- as.data.frame(t(as.data.frame(pca.output$var$coord)))
    row.names(data.var) <- gsub('Dim.', 'PC', row.names(data.var))
    out[['txt']] <- rbind(out[['txt']], as.data.frame(t(data.var)))
    out[['json']] <- paste('{"ind":',
                  out[['json']],
                  ',"var":',
                  Dataframe2DataJson(data=data.var),
                  ',"eig":{',
                  paste(eig.json, collapse=','),
                  '}}',
                  sep='')
  } else {
    out[['json']] <- paste('{"data":',
                  out[['json']],
                  ',"eig":{',
                  paste(eig.json, collapse=','),
                  '}}',
                  sep='')
  }

  return (out)
}

################
# Correspondence Analysis (CA)
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) map        (data-frame) Experimental design table with nrow=ncol(table)
#  (3) model      (character vector) A formula representing the model for stats. Terms
#                 must refer to names(map)
#  (4) col        (character) Name of a column in map to use to define colors
#  (5) biplot     (numeric) Show n=biplot factors with biggest norms on the
#                 biplot
#  (6) verbose    (character) Print messages? see PrintMsg() options
#  (7) graphical  (logical) Generate graphics?
# Returns:
#  Figure data as a json string
# Output to device:
#  A figure
# Prints to stdout:
#  A description of the function
################
PerformCA <- function(table=NULL, map=NULL,
                      verbose=TRUE, graphical=TRUE) {

  suppressMessages(library('FactoMineR'))

  ncp <- min(max(2, ncol(table)), 5)
  ca.output <- CA(table, ncp=ncp, graph=FALSE)

  if (graphical == TRUE) {
    plot(ca.output, new.plot=FALSE)
  }

  # Pack data into a json string
  data.ind <- as.data.frame(t(as.data.frame(ca.output$col$coord)))
  data.json <- Dataframe2DataJson(data=data.ind, xmetadata=map)

  eig.json <- c(1:ncp)
  for (i in 1:ncp) {
    eig.json[i] <- paste('"Dim',
                         i,
                         '":"',
                         ca.output$eig[i, 2],
                         '"',
                         sep='')
  }

  data.var <- t(as.data.frame(ca.output$row$coord))

  data.json <- paste('{"ind":',
                  data.json,
                  ',"var":',
                  Dataframe2DataJson(data=data.var),
                  ',"eig":{',
                  paste(eig.json, collapse=','),
                  '}}',
                  sep='')

  PrintMsg('"description":"This is a correspondence analysis (CA) (R package {FactoMineR})."',
           verbose)

  return (data.json)
}

################
# Principal Coordinates Analysis (PCoA)
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) map        (data-frame) Experimental design table with nrow=ncol(table)
#  (3) fun        (character) The distance function to use
#  (4) model      (character vector) A formula representing the model for stats. Terms
#                 must refer to names(map)
#  (5) col        (character) Name of a column in map to use to define colors
#  (6) json       (character) Json string specifying packages
#  (7) verbose    (character) Print messages? see PrintMsg() options
#  (8) graphical  (logical) Generate graphics?
# Returns:
#  Figure data as a json string
# Output to device:
#  A figure
# Prints to stdout:
#  A description of the function
################
PerformPCoA <- function(table=NULL, map=NULL, fun='bray',
                        json=libJson, verbose=TRUE, graphical=TRUE) {

  spec <- as.data.frame(do.call('rbind', json$distance))
  spec <- spec[spec$value == fun, ]
  package <- ''
  pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=','))
  if (length(pkg) > 0) {
    for (i in 1:length(pkg)) {
      suppressMessages(library(pkg[i], character.only=TRUE))
    }
    package <- paste(' (R package {', unlist(spec$pkg), '})', sep='')
  }


  dist <- as.matrix(ComputeDistance(table=Transpose(table),
                                    fun=fun, json=json, verbose=FALSE))
  row.names(dist) <- names(table)

  out <- PerformPCA(table=dist, map=map, biplot=FALSE,
                          verbose=FALSE, graphical=graphical)

  PrintMsg(paste('"description":"This is a principal coordinate analysis (PCoA) (R package {FactoMineR}) based on ',
                 spec$label,
                 package,
                 '."',
                 sep=''),
           verbose)

  return (out)
}

################
# Canonical Correlation Analysis (CCA)
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) map        (data-frame) Experimental design table with nrow=ncol(table)
#  (3) model      (character vector) A formula representing the model for stats. Terms
#                 must refer to names(map)
#  (4) col        (character) Name of a column in map to use to define colors
#  (5) biplot     (numeric) Show n=biplot factors with biggest norms on the
#                 biplot
#  (6) verbose    (character) Print messages? see PrintMsg() options
#  (7) graphical  (logical) Generate graphics?
# Returns:
#  Figure data as a json string
# Output to device:
#  A figure
# Prints to stdout:
#  A description of the function
################
PerformCCA <- function(table=NULL, map=NULL, column=NULL,
                       verbose=TRUE, graphical=TRUE) {

  suppressMessages(library('vegan'))

  table <- as.data.frame(t(table))
  names <- unlist(strsplit(column, split=','))
  categs <- as.data.frame(map[, names])
  names(categs) <- names
  row.names(categs) <- row.names(map)

  cca.output <- cca(table ~ ., data=categs)

  if (graphical == TRUE) {
    plot(cca.output)
  }

  # Pack data into a json string
  data.ind <- as.data.frame(t(cca.output$CCA$u))
  data.var <- as.data.frame(t(cca.output$CCA$v))
  data.biplot <- as.data.frame(t(cca.output$CCA$biplot))
  data.json <- paste('{"ind":',
                  Dataframe2DataJson(data=data.ind, xmetadata=map),
                  ',"var":',
                  Dataframe2DataJson(data=data.var),
                  ',"biplot":',
                  Dataframe2DataJson(data=data.biplot),
                  '}',
                  sep='')

  PrintMsg(paste('"description":"This is a canonical correspondence analysis (CCA) (R package {vegan}) with the following constraining variable(s):',
           column,
           '."',
           sep=''),
           verbose)

  return (data.json)
}

################
# Compute fold-change
# Args:
#  (1) response  (numeric vector) Data
#  (2) factor    (vector) Experimental design vector with
#                nrow=length(response)
#  (3) by        (dataframe) A nested variable to use for calculating
#                fold-changes
#  (4) balanced  (bool) assume that model balanced? If NULL, balance will be tested
#  (5) verbose   (character) Print messages? see PrintMsg() options
# Returns:
#  A data-frame containing fold-changes for each variable in model
# Prints to stdout:
#  A description of the function
################
ComputeFoldChange <- function(group1=NULL, group2=NULL, names=NULL, verbose=TRUE) {

  out <- list()
  out[['logfc']] <- log2(mean(group2))-log2(mean(group1))
  out[['ab']] <- mean(c(group2, group1))

  PrintMsg(paste('"description":"Fold-change is calculated as log2(mean(',
                 names[2],
                 '))-log2(mean(',
                 names[1],
                 ')) and abundance is calculated as (mean(',
                 names[1],
                 ')+mean(',
                 names[2],
                 '))/2."',
                 sep=''),
           verbose)
  return(out)
}


################
# Extract group of samples from map
# Args:
#  (1) map      (data-frame) Experimental design table with nrow=ncol(table)
# Returns:
# Prints to stdout:
################
ExtractGroupFromMap <- function(map=NULL, column=NULL, group=NULL, verbose) {
  map <- as.data.frame(FilterMap(map=map, model=column))
  group <- unlist(strsplit(group, split=':'))
  if (ncol(map) != length(column)) {
    PrintMsg('"error":"Unable to determine groups for fold-change analysis. Make sure no - or : character is present in the mapping file."'
             , verbose)
  }
  keep <- 1
  for (i in 1:ncol(map)) {
    keep <- keep*(map[, i] == group[i])
  }
  return(keep)
}

################
# Analyse changes (fold-changes, p-values etc.)
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) map        (data-frame) Experimental design table with nrow=ncol(table)
#  (3) model      (character vector) A formula representing the model for stats. Terms
#                 must refer to names(map)
#  (4) json       (character) Json string specifying packages
#  (5) verbose    (character) Print messages? see PrintMsg() options
# Returns:
#  Figure data as a json string
# Prints to stdout:
#  A description of the function
################
AnalyseChange <- function(table=NULL, map=NULL,
                          stats=NULL, model=NULL,
                          json=libJson, verbose=TRUE,
                          graphical=FALSE) {

  PrintMsg('"description":"This is a differential analysis (fold-changes and statistical tests)."',
           verbose)
  # Calculate statistics
  if (is.null(model)) {
    model <- paste(names(map), sep='+')
  }
  nmodel <- length(model)
  data.json <- c(1:nmodel)
  out <- list()
  for (i in 1:nmodel) {
    if (length(stats) != length(model)) {
      method = stats[1]
    } else {
      method = stats[i]
    }
    nrow <- nrow(table)
    effect.json <- list()
    v <- verbose
    graphic.data <- list()
    for (j in 1:nrow) {
      stat <- ComputeStats(response=as.vector(unlist(table[j, ])), map=map,
                           method=method, model=model[i], pairwise=TRUE,
                           verbose=v)$pairwise

        effect.names <- names(stat)
        neffect <- length(effect.names)
        for (k in 1:neffect) {
          if (j == 1 && k == 1) {
            graphic.data[[effect.names[k]]] <- list()
          }
          p <- unlist(stat[[k]][, ncol(stat[[k]])])
          if (length(effect.json) == 0) {
            effect.json[[effect.names[k]]] <- list()
          }
          comp.names <- row.names(stat[[k]])
          ncomp <- length(comp.names)
          for (l in 1:ncomp) {
            if (j == 1 && l == 1) {
              graphic.data[[effect.names[k]]][[comp.names[l]]] <- data.frame(logfc=c(1:nrow), ab=c(1:nrow))
              row.names(graphic.data[[effect.names[k]]][[comp.names[l]]]) <- row.names(table)
            }
            if (length(effect.json[[effect.names[k]]]) == 0) {
              effect.json[[effect.names[k]]][[comp.names[l]]] <- list()
              effect.json[[effect.names[k]]][[comp.names[l]]] <- c(1:nrow)
            }
            group.names <- unlist(strsplit(row.names(stat[[k]])[l], split='-'))
            if (length(group.names) != 2) {
              PrintMsg('"error":"Unable to determine groups for fold-change analysis. Make sure no - or : character is present in the mapping file."', verbose)
            }

            group1 <- ExtractGroupFromMap(map=map, column=names(stat)[k], group=group.names[1], verbose)
            group2 <- ExtractGroupFromMap(map=map, column=names(stat)[k], group=group.names[2], verbose)

            fc <- ComputeFoldChange(as.vector(unlist(table[j, group1 == 1])),
                                    as.vector(unlist(table[j, group2 == 1])),
                                    names=group.names,
                                    verbose=v)

            effect.json[[effect.names[k]]][[comp.names[l]]][j] <- paste('{"mean abundance":"',
                                                                        fc$ab,
                                                                        '","log2(fold-change)":"',
                                                                        fc$logfc,
                                                                        '","-log2(p-value)":"',
                                                                        -log2(p[l]),
                                                                        '"}',
                                                                        sep='')
            graphic.data[[effect.names[k]]][[comp.names[l]]]$logfc[j] <- fc$logfc
            graphic.data[[effect.names[k]]][[comp.names[l]]]$ab[j] <- fc$ab
          }
        }
        v <- FALSE
      }


      for (k in 1:length(effect.json)) {
        for (l in 1:length(effect.json[[k]])) {
          effect.json[[k]][[l]] <- paste('"',
                                         names(effect.json[[k]])[l],
                                         '":[',
                                         paste(effect.json[[k]][[l]], collapse=','),
                                         ']',
                                         sep='')

          if (graphical == TRUE) {
            plot(graphic.data[[k]][[l]]$ab,
                 graphic.data[[k]][[l]]$logfc,
                 main=names(effect.json[[k]])[l],
                 xlab='mean abundance',
                 ylab='log2(fold-change)')
          }
        }
        effect.json[[k]] <- paste('"',
                                  names(effect.json)[k],
                                  '":{',
                                  paste(effect.json[[k]], collapse=','),
                                  '}',
                                  sep='')
      }


      data.json[i] <- paste('"',
                            model[i],
                            '":{',
                            paste(effect.json, collapse=','),
                            '}',
                            sep='')
    }

  out[['json']] <- paste('{"data":{',
               paste(data.json, collapse=','),
               '},"names":["',
               paste(row.names(table), collapse='","'),
               '"]}',
               sep='')
  return(out)

}

################
# Build a heatmap of abundances
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) map        (data-frame) Experimental design table with nrow=ncol(table)
#  (3) model      (character vector) A formula representing the model for stats. Terms
#                 must refer to names(map)
#  (4) metadata   (numeric dataframe) Metadata table
#  (5) fun        (character) Function to use in hierarchical clustering
#  (6) json       (character) Json string specifying packages
#  (7) verbose    (character) Print messages? see PrintMsg() options
#  (8) graphical  (logical) Generate graphics?
# Returns:
#  Figure data as a json string
# Output to device:
#  A figure
# Prints to stdout:
#  A description of the function
################
BuildHeatMap <- function(table=NULL, map=NULL, stats='anova',
                         model=NULL, secondary=NULL,
                         fun='spearman',
                         json=libJson, verbose=TRUE, graphical=TRUE) {

  suppressMessages(library('gplots'))

  PrintMsg('"description":"This is a heat-map of abundances."',
           verbose=verbose)

  # Check that there is no constant data
  RemoveConstantRows(table=table, verbose=TRUE)

  if ((nrow(table) < 3) || (ncol(table) < 3)) {
    PrintMsg('"error":"Not enough data to draw a heatmap (min 3 rows and 3 columns)."', TRUE, TRUE)
  }

  # Center/scale by row
  data <- as.data.frame(Transpose(apply(table, 1, cScale)))

  names(data) <- names(table)
  row.names(data) <- row.names(table)
  PrintMsg('"description":"Abundances are scaled on standard deviation and centered on mean per row."',
           verbose=verbose)

  # keep only columns of map that are used in the model
  map <- FilterMap(map=map, model=model)

  # Set function for hierarchical clustering
  DistFun <- function(x) {
    return (ComputeDistance(table=x,
                            fun=fun,
                            json=json,
                            verbose=FALSE))
  }

  if (graphical == FALSE) {
    pdf(file = NULL)
  }
    # Set the color palette
    col <- colorRampPalette(c("navy", "seashell", "red2"))(n=51)

    heatmap.out <- heatmap.2(as.matrix(data),
                scale='row',
                na.rm=TRUE,
                Rowv=TRUE,
                Colv=TRUE,
                dendrogram='both',
                #distfun=DistFun,
                col=col,
                symm=FALSE,
                symkey=FALSE,
                key=TRUE,
                keysize=1.5,
                density.info='density',
                trace='none',
                labCol=names(data),
                labRow=row.names(data),
                cexCol=0.2+1/log10(ncol(data)),
                mar=c(5, 10))

  if (graphical == FALSE) {
    dev.off()
  }

  # Order data according to dendrogram
  data <- data[heatmap.out$rowInd, ]
  data <- data[, heatmap.out$colInd]
  names <- names(map)
  map <- as.data.frame(map[heatmap.out$colInd, ])
  names(map) <- names
  if (!is.null(secondary)) {
    secondary <- secondary[, heatmap.out$colInd]
  }

  # Store heatmap data into a json string
  ncol <- ncol(data)
  heatmap.json <- c(1:ncol)
  data.names <- names(data)
  for (i in 1:ncol) {
    heatmap.json[i] <- paste('{"name":"',
                             data.names[i],
                             '","value":["',
                             paste(data[, i], collapse='","'),
                             '"]}',
                             sep='')
  }

  # Add categories to topbar
  topbar.json <- list()
  ncateg <- ncol(map)
  category.json <- c(1:ncateg)
  categ.names <- names(map)


  for (i in 1:ncateg) {
    category.json[i] <- paste('{"name":"',
                              categ.names[i],
                              '","value":["',
                              paste(map[, i], collapse='","'),
                              '"]}',
                              sep='')
  }
  topbar.json[['category']] <- paste('"category":[',
                                paste(category.json, collapse=','),
                                ']',
                                sep='')


  # Add p-values to sidebar
  sidebar.json <- list()

  if (is.null(model)) {
    model <- paste(names(map), sep='+')
  }

    nmodel <- length(model)
    stat.json <- c(1:nmodel)
    for (i in 1:nmodel) {
      if (length(stats) != length(model)) {
        method = stats[1]
      } else {
        method = stats[i]
      }
      nrow <- nrow(data)
      v <- verbose
      p.value <- list()
      for (j in 1:nrow) {
        stat <- ComputeStats(response=as.vector(unlist(data[j, ])), map=map,
                             method=method, model=model[i], pairwise=FALSE,
                             verbose=v)$summary

        stat.rownames <- row.names(stat)[row.names(stat) != 'Residuals']
        stat <- as.data.frame(stat[row.names(stat) != 'Residuals', ])

        nstat <- nrow(stat)
        for (k in 1:nstat) {
          if (is.null(p.value[[stat.rownames[k]]])) {
            p.value[[stat.rownames[k]]] <- c(1:nrow)
          }
          p.value[[stat.rownames[k]]][j] <- stat[k, ]
        }
        v <- FALSE
      }
      for (j in 1:length(p.value)) {
        p.value[[j]] <- paste('{"name":"',
                              names(p.value)[j],
                              '","value":["',
                              paste(p.value[[j]], collapse='","'),
                              '"]}',
                              sep='')
      }
      stat.json[i] <- paste('"',
                            model[i],
                            '":[',
                            paste(p.value, collapse=','),
                            ']',
                            sep='')
    }

  sidebar.json[['p-values']] <- paste('"p-values":{',
                                 paste(stat.json, collapse=','),
                                 '}',
                                 sep='')

  # Add correlations with secondary dataset to sidebar
  if (! is.null(secondary)) {

    PrintMsg('"description":"The correlations between the primary and secondary datasets were calculated using the Spearman rank correlation."',
             verbose=verbose)

    correlation <- as.data.frame(cor(x=t(data), y=t(secondary), use='na.or.complete', method ="spearman"))

    ncor <- ncol(correlation)
    cor.json <- c(1:ncor)
    for (i in 1:ncor) {
      cor.json[i] <- paste('{"name":"',
                           names(correlation)[i],
                           '","value":["',
                           paste(correlation[, i], collapse='","'),
                           '"]}',
                           sep='')
    }
    sidebar.json[['correlations']] <- paste('"correlations":[',
                             paste(cor.json, collapse=','),
                             ']',
                             sep='')
  }

  out <- list()
  out[['txt']] <- as.data.frame(data)
  out[['json']] <- paste('{"heatmap":[',
                     paste(heatmap.json, collapse=','),
                     '],"colnames":["',
                     paste(names(data), collapse='","'),
                     '"],"rownames":["',
                     paste(row.names(data), collapse='","'),
                     '"],"topbar":{',
                     paste(topbar.json, collapse=','),
                     '},"sidebar":{',
                     paste(sidebar.json, collapse=','),
                     '},"colDendrogram":',
                     dendrogram2json(heatmap.out$colDendrogram)[[1]],
                     ',"rowDendrogram":',
                     dendrogram2json(heatmap.out$rowDendrogram)[[1]],
                     '}',
                     sep='')

  return (out)
}

################
# Get the groups with the highest mean
# Args:
#  (1) data     (numeric vector) The data
#  (2) map      (data-frame) Experimental design table with nrow=length(data)
#  (3) model    (character) A formula representing the model for stats. Terms
#               must refer to names(map)
#  (4) verbose  (character) Print messages? see PrintMsg() options
# Returns:
#  A character vector with length=numer of terms in model
# Prints to stdout:
#  A description of the function
################
getGroupWithHighestMean <- function(data=NULL, map=NULL, column=NULL, verbose=TRUE) {

  PrintMsg('"description":"Determined group with highest mean abundance."',
           verbose=verbose)
  map <- as.data.frame(map[, unlist(strsplit(column, split=':'))])
  if (ncol(map) == 1) {
    names(map) <- column
  }

  group <- as.data.frame(unique(map))
  if (ncol(group) == 1) {
    names(group) <- column
  }

  mean <- c(1:nrow(group))
  for (i in 1:nrow(group)) {
    keep <- 1
    for (j in 1:ncol(map)) {
      keep <- keep*(map[, j] %in% group[i, names(map)[j]])
    }
    mean[i] <- mean(unlist(data[as.logical(keep)]))
  }
  out <- list()
  out$value <- max(mean)
  out$id <- paste(unlist(group[which(mean == max(mean)), ]), collapse=':')
  out$group <- apply(group, 1, paste, collapse=':')

  return (out)
}

################
# Build a network of correlations for variables
# Args:
#  (1) table      (numeric dataframe) Count table
#  (2) map        (data-frame) Experimental design table with nrow=ncol(table)
#  (3) model      (character) A formula representing the model for stats. Terms
#                 must refer to names(map)
#  (4) metadata   (numeric dataframe) Metadata table
#  (5) fun        (character) Distance, correlation or similarity function
#  (6) json       (character) Json string specifying packages
#  (7) verbose    (character) Print messages? see PrintMsg() options
# Returns:
#  Figure data as a json string
# Prints to stdout:
#  A description of the function
################
BuildCorrelationNetwork <- function(table=NULL, map=NULL, stats='anova',
                                    model=NULL, secondary=NULL, fun='spearman',
                                    json=libJson, verbose=TRUE) {

  spec <- as.data.frame(do.call('rbind', json$correlation))
  spec <- spec[spec$value == fun, ]
  pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=','))
  package <- ''
  if (length(pkg) > 0) {
    for (i in 1:length(pkg)) {
      suppressMessages(library(pkg[i], character.only=TRUE))
    }
    package <- paste(' (R package {', unlist(spec$pkg), '})', sep='')
  }

  PrintMsg(paste('"description":"This is a correlation network. It was built using ',
                        spec$label, package, '."', sep=''),
                        verbose=verbose)

  suppressMessages(library(igraph))


  # keep only columns of map that are used in the model
  #map <- FilterMap(map=map, model=model)

  # Combine with secondary dataset
  data <- rbind(table, secondary)
  data.type <- rep('primary dataset', nrow(data))
  if (!is.null(secondary)) {
    data.type[(nrow(table)+1):nrow(data)] <- "secondary dataset"
  }

  # Check that there is no constant data
  RemoveConstantRows(table=data, verbose=TRUE)

  if ((nrow(data) < 2) || (ncol(data) < 3)) {
    PrintMsg('"error":"Not enough data to compute a network (min 2 rows and 3 columns)."', TRUE, TRUE)
  }

  # Calculate the correlation matrix
  weight <- ComputeCorrelation(table=data, fun=fun, test=TRUE,
                      json=json, verbose=FALSE)

  nmodel <- length(model)
  nnodes <- nrow(data)
  node.json <- c(1:nnodes)
  link.json <- c(1:(nnodes*(nnodes/2-1)))
  l <- 1
  legend.json <- list()
  for (i in 1:nnodes) {
    model.json <- c(1:nmodel)
    for (j in 1:nmodel) {
      if (length(stats) != length(model)) {
        method = stats[1]
      } else {
        method = stats[j]
      }

      stat <- ComputeStats(response=as.vector(unlist(data[i, ])), map=map,
                           method=method, model=model[j], pairwise=FALSE,
                           verbose=FALSE)$summary
      neffect <- length(row.names(stat)[row.names(stat) != 'Residuals'])
      effect.json <- c(1:neffect)
      if (length(legend.json[[model[j]]]) == 0) {
        legend.json[[model[j]]] <- c(1:neffect)
      }
      for (k in 1:neffect) {
        gwhm <- getGroupWithHighestMean(data=data[i, ], map=map, column=row.names(stat)[k], verbose=FALSE)
        effect.json[k] <- paste('"',
                              row.names(stat)[k],
                              '":{"p-value":"',
                              stat[k, ],
                              '","highest-mean":"',
                              gwhm$id,
                              '"}',
                              sep='')
        if (i == 1) {
          legend.json[[model[j]]][k] <- paste('"',
                                              row.names(stat)[k],
                                              '":["',
                                              paste(gwhm$group, collapse='","'),
                                              '"]',
                                              sep='')
        }
      }
      model.json[j] <- paste('"',
                             model[j],
                             '":{',
                             paste(effect.json, collapse=','),
                             '}',
                             sep='')
      if (i == 1) {
        legend.json[[model[j]]] <- paste('"',
                                         model[j],
                                         '":{',
                                         paste(legend.json[[model[j]]], collapse=','),
                                         '}',
                                         sep='')
      }
    }
    node.json[i] <- paste('{"id":"',
                          i-1,
                          '","name":"',
                          row.names(data)[i],
                          '","data-type":"',
                          data.type[i],
                          '","stat":{',
                          paste(model.json, collapse=','),
                          '},"mean":"',
                          mean(unlist(data[i, ])),
                          '","min":"',
                          min(unlist(data[i, ])),
                          '","max":"',
                          max(unlist(data[i, ])),
                          '"}',
                          sep='')
    if (i < nnodes) {
      for (j in (i+1):nnodes) {
        link.json[l] <- paste('{"source":"',
                               i-1,
                               '","target":"',
                               j-1,
                               '","weight":"',
                               weight$estimate[i, j],
                               '","p-value":"',
                               weight$p.value[i, j],
                               '"}',
                               sep='')

        l <- l+1
      }
    }
  }

  out <- list()
  out[['txt']] <- as.data.frame(weight$estimate)
  names(out[['txt']]) <- row.names(data)
  row.names(out[['txt']]) <- row.names(data)
  out[['json']] <- paste('{"nodes":[',
                paste(node.json, collapse=','),
                '],"links":[',
                paste(link.json, collapse=','),
                '],"legend":{',
                paste(unlist(legend.json), collapse=','),
                '}}',
                sep='')
  return (out)

}

################
# Build a network of similarity for samples
# Args:
#  (1) tables   (list of numeric dataframes)
#  (2) map      (data-frame) Experimental design table with nrow=ncol(table)
#  (3) model    (character) A formula representing the model for stats. Terms
#               must refer to names(map)
#  (4) funs     (list of characters) Distance or similarity functions
#  (5) clust    (character) Network clustering algorithm
#  (6) json     (character) Json string specifying packages
#  (7) verbose  (character) Print messages? see PrintMsg() options
#  (8) lib      (character) Path to holynet library
# Returns:
#  List [1] json string (fusion network) [2] data-frame (clusters) 
# Prints to stdout:
#  A description of the function
################
BuildSimilarityNetwork <- function(table=NULL, map=NULL, funs=NULL,
                                   clust="walktrap", clust.names=NULL,
                                   json=libJson, verbose=TRUE,
                                   lib='holynetlib.R') {
  source(lib)

  spec <- as.data.frame(do.call('rbind', json$distance))
  for (i in 1:length(table)) {
    fun <- funs[1]
    if ((length(funs) == length(table)) && (length(funs) > 1)) {
      fun <- funs[i]
    }
    spec <- spec[spec$value == fun, ]
    pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=','))
    package <- ''
    if (length(pkg) > 0) {
      for (j in 1:length(pkg)) {
        suppressMessages(library(pkg[j], character.only=TRUE))
      }
      package <- paste(' (R package {', unlist(spec$pkg), '})', sep='')
    }

    PrintMsg(paste('"description":"This is a similarity network. It was built using ',
             spec$label, package, '."', sep=''),
             verbose=verbose)
  }

  # Calculate similarity matrices
  mat <- list()
  for (i in 1:length(table)) {

    if ((nrow(table[[i]]) < 3) || (ncol(table[[i]]) < 3)) {
      PrintMsg('"warning":"Not enough data to compute a network (min 3 rows and 3 columns)."', TRUE)
      mat[[i]] <- NULL
    } else {
      fun <- funs[1]
      if (length(funs) == length(table)) {
        fun <- funs[i]
      }
      mat[[i]] <- CreateAdjacencyMatrix(CreateSimilarityMatrix(table[[i]], fun))
    }
  }

  # If multiple matrices exist, fuse them
  if (length(mat) > 1) {
    fusmat <- FuseMatrices(mat)
    if (is.null(fusmat)) {
      PrintMsg('"error":"Trying to fuse matrices but dimensions are not matching."', TRUE, TRUE)
    }
    mat[[(length(mat)+1)]] <- fusmat
  }

  out <- list()

  # Compute clustering on similarity matrices
  if (!is.null(clust) && (clust != 'none')) {

    spec <- as.data.frame(do.call('rbind', json$graph_clustering))
    spec <- spec[spec$value == clust, ]
    pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=','))
    package <- ''
    if (length(pkg) > 0) {
      for (i in 1:length(pkg)) {
        suppressMessages(library(pkg[i], character.only=TRUE))
      }
      package <- paste(' (R package {', unlist(spec$pkg), '})', sep='')
    }

    PrintMsg(paste('"description":"Clusters were determined using the ',
                          spec$label, package, '."', sep=''),
                          verbose=verbose)

    clusters <- matrix(ncol=length(mat), nrow=nrow(mat[[1]]))
    for (i in 1:length(mat)) {
      clusters[, i] <- paste(clust,
                             as.character(ApplyNetworkClustering(CreateNetwork(mat[[i]]), clust_method=clust)),
                             sep='')
    }
    clusters <- as.data.frame(clusters)
    if (!is.null(clust.names)) {
      names(clusters) <- clust.names
    }
    out[['txt']] <- clusters
    map <- cbind(map, clusters)
  }

  # Pack data into a json string
  json.data <- buildFusionNetworkJson(mat, map)

  out[['json']] <- json.data
  return (out)
}

#################################
#        (\/)(°,,,°)(\/)        #
#################################
libJson <- ReadJson()