diff --git a/app/assets/stylesheets/general.css.scss b/app/assets/stylesheets/general.css.scss index 9f2c101..2fb7711 100644 --- a/app/assets/stylesheets/general.css.scss +++ b/app/assets/stylesheets/general.css.scss @@ -1,350 +1,355 @@ .main-container { height:auto; min-height:100vh; padding-top:1rem; } +.pipeline-figure { + height:80vh; + width:auto; +} + .doc-table tr td, .doc-table tr th, .version-table tr td { text-align:left } .border-left { border-left:1px solid #ccc; } .border-right { border-right:1px solid #ccc; } .border-top { border-top:1px solid #ccc; } .border-bottom { border-top:1px solid #ccc; } .inline-block { display:inline-block; } .navbar.navbar-top { -moz-box-shadow: 0 2px 2px #808080; -webkit-box-shadow: 0 2px 2px #808080; box-shadow: 0 2px 2px #808080; } .navbar.navbar-bottom { border-top:1px solid #ccc; } .side-index-container { padding:0; z-index:0; } .side-index-container .side-index { height:100vh; overflow:auto; width:100%; } .side-index-container .side-index-list { margin-bottom:50vh; padding-left:5px; padding-right:5px; padding-top:1rem; } .topbar-padding { padding-top:56px; } .topbar-margin { margin-top:56px; } .dropdown-item { cursor:pointer; } .float-right { float:right } .align-center { text-align:center; } .align-justify { text-align:justify; } .align-left { text-align:left; } .align-right { text-align:right; } ul.no-bullets li { list-style-type:none; } .full-width { width:100%; } .full-height { height:100%; } .align-items-center { align-items:center; } .hidden { display:none; } .row { display: -webkit-box; display: -webkit-flex; display: -ms-flexbox; display: flex; -webkit-flex-wrap: wrap; -ms-flex-wrap: wrap; flex-wrap: wrap; } .video-wrapper { position: relative; padding-bottom: 56.25%; /* 16:9 */ padding-top: 25px; height: 0; iframe { position: absolute; top: 0; left: 0; width: 100%; height: 100%; } } #popup_window { position: absolute; z-index:100000; padding:15px; display: none; background: #ccc; border: 1px solid; } #popup_window_close { float:right; font-size:20px; margin-top:-19px; margin-right:-10px; cursor:pointer; } .tip-window-container { position:relative; display:block; width:0; height:0; } .tip-window { position: absolute; display:block; z-index:1; margin: 15px 0 0 0; padding:10px; background: #e6e6e6; border: 1px solid #333; color:#333; width:300px; text-align:left; white-space:normal; } .tip-window:before, .tip-window:after { position:absolute; content:""; width:0; height:0; } .tip-window.right:before, .tip-window.right:after { right:10px; } .tip-window.left:before, .tip-window.left:after { left:10px; } .tip-window:before { top:-15px; border-bottom:15px solid #333; border-left:15px solid transparent; border-right:15px solid transparent; } .tip-window:after { top:-14px; z-index:1; border-bottom:15px solid #e6e6e6; border-left:15px solid transparent; border-right:15px solid transparent; } .tip-window-close { cursor:pointer; } .title_popup {white-space:nowrap;font-weight:bold} .infos { position:static; } .background-col-1 { background-color:#e6e6e6; } .background-col-2 { color:#fff; background-color:#999; } .row-padding-10 { padding-top:10vh; padding-bottom:10vh; } .row-padding-5 { padding-top:5vh; padding-bottom:5vh; } .row-padding-2 { padding-top:2vh; padding-bottom:2vh; } .row-padding-bottom-20 { padding-bottom:20vh; } .v-align-middle { height:100%; transform:translate(0, 35%); } .back-to-top { color:#999; text-decoration: none; &:hover, &:focus, &:active { color:#ccc; text-decoration: none; } } .content { position:static; display: table; margin-right: auto; margin-left: auto; margin-top: 60px; margin-bottom: 60px; .left { float:left; margin-right:20px; } .right { float:right; margin-left:20px; } .align-left { text-align:left; } .align-right { text-align:right; } .centered-div { width: 260px; line-height: 300px; } .centered-span { display: inline-block; vertical-align: middle; line-height: normal; } } a { color: #333; &.white { color: #fff; } &:link, &:visited { text-decoration: none; } &:hover { text-decoration: underline; &.button, &.tab, &.subtab, &.help-button, &.slider, &.slide-open, &.slide-close, &.test-button { text-decoration: none; } &.slide-open, &.slide-close { text-shadow: 1px 0px #333, -1px -0px #333, 0px -1px #333, 0px 1px #333; } } } h1 { font-weight: 500; font-size: 28px; line-height: 34px; } h2 { font-weight: 500; font-size: 24px; line-height: 30px; } h3 { font-weight: 500; font-size: 20px; line-height: 25px; } pre { font-family: verdana, arial, helvetica, sans-serif; background-color: #eee; padding: 10px; font-size: 12px; white-space: pre-wrap; white-space: -moz-pre-wrap; white-space: -pre-wrap; white-space: -o-pre-wrap; word-wrap: break-word; } diff --git a/app/controllers/jobs_controller.rb b/app/controllers/jobs_controller.rb index a6e85b5..e6b667c 100644 --- a/app/controllers/jobs_controller.rb +++ b/app/controllers/jobs_controller.rb @@ -1,832 +1,791 @@ class JobsController < ApplicationController before_action :set_job, only: [:show, :edit, :update, :destroy, :serve, :serve_archive, :serve_stderr, :view, :refresh, :clone] - before_action :check_storage_limit, only: [:new, :create] + before_action :check_storage_limit, only: [:new, :create, :update, :clone] # before_action :authenticate_user!, only: [:show, :edit, :update, :destroy, :serve] def check_storage_limit if (controller_name == 'jobs' and ['create', 'new', 'update'].include? action_name) if current_user and current_user.storage_quota > 0 and current_user.total_jobs_size > current_user.storage_quota flash[:notice] = "Sorry but you exceeded your storage quota. Please clean up old analyses first." redirect_to root_url end end end def kill_job j tmp_dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + j.user_id.to_s + j.key main_pid = File.read(tmp_dir + ".pid") if File.exist?(tmp_dir + ".pid") # kill job if one already running existing_main_job = `ps -ef | grep #{j.key} | grep #{main_pid} | grep -v 'grep'` puts "Existing main job: " + existing_main_job.to_json # pids=[] if main_pid and !existing_main_job.empty? lines = `ps -ef | grep #{j.key} | grep -v 'grep'`.split("\n").select{|l| !l.empty?} pids = lines.map{|l| t= l.split(/\s+/); t[1]} pids.unshift(main_pid) if pids.size > 0 pids.each do |pid| cmd = "kill #{pid}" `#{cmd}` end end end # delete log.json log_json = tmp_dir + "output" + "log.json" #logger.debug("DEBUG: " + log_json.to_s) File.delete log_json if File.exist? log_json end def clone tmp_dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @job.user_id.to_s + @job.key @new_job = nil if current_user @new_job = @job.dup session[:current_key] = create_key() @new_job.key = session[:current_key] else current_job = Job.where(:key => session[:current_key]).first if current_job ### kill current job kill_job(current_job) @new_job = current_job @new_job.update_attributes(:form_json => @job.form_json, :output_json => @job.output_json, :name => @job.name, :description => @job.description, :status => @job.status) else @new_job = @job.dup @new_job.key = session[:current_key] end end ### clone @new_job.sandbox = (current_user) ? false : true @new_job.user_id = (current_user) ? current_user.id : 1 new_tmp_dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @new_job.user_id.to_s Dir.mkdir(new_tmp_dir) if !File.exist? new_tmp_dir new_tmp_dir += @new_job.key if File.exist? new_tmp_dir FileUtils.rm_r new_tmp_dir Dir.mkdir new_tmp_dir end FileUtils.cp_r tmp_dir.to_s + "/.", new_tmp_dir ### rename cloned archive FileUtils.mv (new_tmp_dir + (@job.key + "." + APP_CONFIG[:archive_format])), (new_tmp_dir + (@new_job.key + "." + APP_CONFIG[:archive_format])) if File.exist?(new_tmp_dir + (@job.key + "." + APP_CONFIG[:archive_format])) ### change filepaths in the result file cmd = "perl -i -pe 's/\\/#{@job.user_id}\\/#{@job.key}\\//\\/#{@new_job.user_id}\\/#{@new_job.key}\\//g' #{new_tmp_dir}/output/log.json" #logger.debug("CMD: #{cmd}") `#{cmd}` ### delete tmp_dir FileUtils.rm_r new_tmp_dir + "tmp" if File.exist?(new_tmp_dir + "tmp") @new_job.name += " copy" if @new_job.save redirect_to job_path(@new_job.key) end end def serve if readable? @job sleep 0.5 - job_dir = Pathname.new(APP_CONFIG[:data_dir]) + 'users' + @job.user.id.to_s + @job.key + job_dir = Pathname.new(APP_CONFIG[:data_dir]) + 'users' + @job.user_id.to_s + @job.key filename = params[:filename] filepath = job_dir + filename if File.exist?(filepath) send_file filepath.to_s, type: params[:content_type] || 'text', disposition: (!params[:display]) ? ("attachment; filename=" + filename.gsub("/", "_")) : '' else flash[:notice] = 'Sorry, this file is no longer available, please try again later.' redirect_to job_path(@job.key) end end end def serve_archive if readable? @job `rails make_job_archive[#{@job.key}]` - job_dir = Pathname.new(APP_CONFIG[:data_dir]) + 'users' + @job.user.id.to_s + @job.key + job_dir = Pathname.new(APP_CONFIG[:data_dir]) + 'users' + @job.user_id.to_s + @job.key filename = "#{@job.key.to_s}.#{APP_CONFIG[:archive_format]}" filepath = job_dir + filename t = (APP_CONFIG[:archive_format] == 'zip') ? 'zip' : 'tar+gzip' if File.exist?(filepath) send_file filepath.to_s, type: t, disposition: "attachment; filename=" + filename.gsub("/", "_") end end end def serve_stderr if readable? @job `rails format_job_stderr[#{@job.key}]` - job_dir = Pathname.new(APP_CONFIG[:data_dir]) + 'users' + @job.user.id.to_s + @job.key + job_dir = Pathname.new(APP_CONFIG[:data_dir]) + 'users' + @job.user_id.to_s + @job.key filename = "stderr.txt" filepath = job_dir + filename if File.exist?(filepath) send_file filepath.to_s, type: 'text', disposition: "attachment; filename=" + filename end end end def read_file_header if params[:file_key] new_data = [] - - user_id = (current_user) ? current_user.id.to_s : "1" - dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + user_id + session[:current_key] + 'input' + @job = Job.where(:key => session[:current_key]).first + dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @job.user_id.to_s + @job.key + 'input' filename = params[:file_key] + ".txt" filepath = dir + filename test = "" if File.exist? filepath lines = [] File.open(filepath, "r") do |f| while(l = f.gets) do lines.push(l.chomp) end end j=0 (0 .. lines.size-1).to_a.each do |i| if !lines[i].match(/^#/) j=i break end end i = (j > 1) ? j-1 : 0 headers = lines[i].split("\t") if params[:add_blank] new_data.push({label:"", value:""}); end if headers.size > 0 headers.each do |header| new_data.push({:label => header, :value => header}); end end end render :text => new_data.to_json else render :nothing => true end end def read_file_column if params[:file_key] new_data = [] user_id = (current_user) ? current_user.id.to_s : "1" dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + user_id + session[:current_key] + 'input' filename = params[:file_key] + ".txt" filepath = dir + filename if File.exist? filepath lines = File.readlines(filepath) j=0 (0 .. lines.size-1).each do |i| if !lines[i].match(/^#/) j = i break end end i = (j > 1) ? j-1 : 0 header_els = lines[i].chomp.split("\t") pos_col = 0 (0 .. header_els.size).each do |k| if header_els[k] == params[:col_name] pos_col = k break end end max_val=0 (i .. lines.size-1).each do |j| t = lines[j].split("\t") n = t[pos_col].split(";").size if n > max_val max_val = n end end if max_val > 0 (1 .. max_val+1).each do |j| new_data.push({:label => j, :value => j}); end end end render :text => new_data.to_json else render :nothing => true end end # BUILD THE INDEX OF JOBS FROM CURRENT USER OR ALL (ADMIN ONLY) def index @scope = params[:scope] if current_user if current_user.role == "admin" and @scope == 'global' @jobs = Job.all else @jobs = current_user.jobs.all end end get_total_jobs_size() respond_to do |format| format.html{ if !current_user redirect_to "/welcome" end } end end def get_views @h_views = {} View.all.each do |v| @h_views[v.name]= v end end def get_statuses log_file = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @user.id.to_s + @job.key + 'output' + 'log.json' - #stdout_file = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @user.id.to_s + @job.key + 'output' + 'stdout.log' - #stderr_file = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @user.id.to_s + @job.key + 'output' + 'stderr.log' - - #if File.exist? stdout_file and !File.size(stdout_file) != 0 - # @stdout_log = JSON.parse(File.read(stdout_file)).select {|e| e[0] != nil} - #end - #if File.exist? stderr_file and !File.size(stderr_file) != 0 - # @stderr_log = JSON.parse(File.read(stderr_file)).select {|e| e[0] != nil} - #end @h_statuses = {}#'completed' => 1, 'pending' => 2, 'running' => 3, 'failed' => 4} Status.all.each do |s| @h_statuses[s.name]= s end if File.exist? log_file and !File.size(log_file) != 0 @log_json = JSON.parse(File.read(log_file)) @final_json = { :global_status => nil, :status_by_step => {}, :status_by_substep => {}, :global_status_by_step => {}, :messages_by_step => {} } @h_icons={ 'description' => 'fa fa-info-circle', 'description-item' => 'ml-3', 'comment' => 'fa fa-info-circle', 'output' => 'fa fa-file-text-o', 'warning' => 'fa fa-exclamation-triangle icon-warning', 'error' => 'fa fa-exclamation-triangle icon-danger' } @test = "" ### datasets @log_json.select{|e| ['dataset', 'map'].include?(e['type'])}.each do |cat| @final_json[:status_by_step][cat['name']] ||= [] @final_json[:global_status_by_step][cat['name']] ||= nil cat['log'].select{|e| e['type'] != 'file'}.each do |el| @final_json[:status_by_substep][el['name']]=[] step_key = cat['name'] + "_" + el['name'] @final_json[:messages_by_step][step_key] ||= [] tmp_global_status = nil if el['operations'] el['operations'].each do |el2| @final_json[:status_by_substep][el['name']].push({:name => el2['name'], :status => el2['status'], :execution_time => el2['execution_time'], :cat => cat['name']}) @final_json[:messages_by_step][step_key].push({:name => el2['name'], :messages => el2['messages'], :cat => cat['name']}) if !tmp_global_status or (@h_statuses[el2['status']] and @h_statuses[el2['status']].precedence > @h_statuses[tmp_global_status].precedence) tmp_global_status = el2['status'] end end end @final_json[:status_by_step][cat['name']].push({:name => el['name'], :status => (el['status'] || tmp_global_status), :execution_time => el['execution_time']}) if !@final_json[:global_status_by_step][cat['name']] or (@h_statuses[el['status']] and @h_statuses[el['status']].precedence > @h_statuses[@final_json[:global_status_by_step][cat['name']]].precedence) @final_json[:global_status_by_step][cat['name']] = el['status'] end if !@final_json[:global_status] or (@h_statuses[el['status']] and @h_statuses[el['status']].precedence > @h_statuses[@final_json[:global_status]].precedence) @final_json[:global_status] = el['status'] end @final_json[:messages_by_step][step_key].push({:name => el['name'], :messages => el['messages']}) end end ### analyses analyses = @log_json.select{|e| e['type'] == 'analysis'} if analyses.size > 0 analyses.first['log'].each do |el| @final_json[:global_status_by_step][el['name']] ||= nil @final_json[:status_by_step][el['name']] ||= [] @final_json[:messages_by_step][el['name']] ||= [] tmp_global_status = nil if el['levels'] el['levels'].each do |el2| @final_json[:status_by_step][el['name']].push({:name => el2['name'], :status => el2['status'], :execution_time => el2['execution_time']}) @final_json[:messages_by_step][el['name']].push({:name => el2['name'], :messages => el2['messages']}) if !tmp_global_status or (@h_statuses[el2['status']] and @h_statuses[el2['status']].precedence > @h_statuses[tmp_global_status].precedence) tmp_global_status = el2['status'] end end end if !@final_json[:global_status_by_step][el['name']] or (@h_statuses[el['status']] and @h_statuses[el['status']].precedence > @h_statuses[@final_json[:global_status_by_step][cat['name']]].precedence) @final_json[:global_status_by_step][el['name']] = el['status'] end if !@final_json[:global_status] or (@h_statuses[el['status']] and @h_statuses[el['status']].precedence > @h_statuses[@final_json[:global_status]].precedence) @final_json[:global_status] = el['status'] end @final_json[:status_by_step][el['name']].push({:name => el['name'], :status => (el['status'] || tmp_global_status), :execution_time => el['execution_time']}) @final_json[:messages_by_step][el['name']].push({:name => el['name'], :messages => el['messages']}) end end @update = 0 @status_vector = [] if @final_json if ['primary_dataset', 'map', 'secondary_dataset'].include? session[:selected_view] if @final_json[:status_by_step][session[:selected_view]] @status_vector += @final_json[:status_by_step][session[:selected_view]].map{|e| @h_statuses[e[:status]].id} @final_json[:status_by_substep].keys.sort.each do |k| @status_vector += @final_json[:status_by_substep][k].select{|e| e[:cat] == session[:selected_view]}.map{|e| @h_statuses[e[:status]].id} end end @final_json[:messages_by_step].keys.sort.each do |k| @status_vector += @final_json[:messages_by_step][k].select{|e| e[:cat] == session[:selected_view] and e[:messages]}.map{|e| e[:messages].size} end else if @final_json[:status_by_step][session[:selected_view]] @status_vector += @final_json[:status_by_step][session[:selected_view]].map{|e| @h_statuses[e[:status]].id} end if @final_json[:messages_by_step][session[:selected_view]] @status_vector += @final_json[:messages_by_step][session[:selected_view]].select{|e| e[:messages]}.map{|e| e[:messages].size} end end end if session[:status_vector] != @status_vector @update = 1 end end ## end file exist end def refresh # get_basic_info() get_statuses() get_job_size() render :partial => "refresh" end def view # @user = (current_user) ? current_user : User.where(:username => 'guest').first get_basic_info() get_statuses() get_job_size() get_views() @form_json = JSON.parse @job.form_json if @job.form_json and !@job.form_json.empty? if !session[:current_level] session[:current_level] = (@form_json['bin_levels'] and @form_json['bin_levels'].size > 0) ? @form_json['bin_levels'][0] : nil end session[:current_level] = params[:current_level].to_i if params[:current_level] @data_json = nil @filename = nil @imagename = nil @description = '' @warning = '' @error = '' @i = 0 @log = '' if !['details', 'primary_dataset', 'secondary_dataset', 'map'].include?(params[:partial]) e = @log_json.select{|el| el['type'] == 'analysis'}.first['log'].select{|el| el['name'] == params[:partial]}.first if e['levels'] #and @form_json['bin_levels'] @i = @form_json['bin_levels'].index(session[:current_level].to_s) @form_json['bin_levels'].each_index do |li| l = @form_json['bin_levels'][li] @log += ">" + l.to_s + ";" if l == session[:current_level].to_s @i = li @log += '!!' end end e2 = e['levels'][@i] @filename = e2['messages'].select{|el| el['output'] and el['output'].match(/#{@h_views[params[:partial]].data_format}$/)}.map{|el| el['output']}.first @imagename = e2['messages'].select{|el| el['output'] and el['output'].match(/pdf|png|jpg$/)}.map{|el| el['output']}.first @description = e2['messages'].select{|el| el['description']}.map{|el| el['description']} @warning = e2['messages'].select{|el| el['warning']}.map{|el| el['warning']} @error = e2['messages'].select{|el| el['error']}.map{|el| el['error']} else @i = -1 @filename = e['messages'].select{|el| el['output'] and el['output'].match(/#{@h_views[params[:partial]].data_format}$/)}.map{|el| el['output']}.first @imagename = e['messages'].select{|el| el['output'] and el['output'].match(/pdf|png|jpg$/)}.map{|el| el['output']}.first @description = e['messages'].select{|el| el['description']}.map{|el| el['description']} @warning = e['messages'].select{|el| el['warning']}.map{|el| el['warning']} @error = e['messages'].select{|el| el['error']}.map{|el| el['error']} end end - #end - if @filename and File.exist? @filename @data_json = File.read(@filename) end session[:selected_view] = params[:partial] json_file = Rails.root.join('lib', 'genocrunch_console', 'etc', 'genocrunchlib.json') file_content = File.read(json_file) h = JSON.parse(file_content) @h_form_choices = h['choices'] render :partial => "view_" + params[:partial] end # ALLOW SESSION VARIABLE UPDATE ON SHOW # THIS ALLOW CONDITIONAL RENDERING OF EITHER FIGURES OR EDIT FORM PARTIALS def show - # @user = (current_user) ? current_user : User.where(:username => 'guest').first - # if current_user.role == "admin" and session[:which] == "all" - # @jobs = Job.all - # @users = User.all - # else#if current_user - # @jobs = current_user.jobs.all - # end - # session[:context] = params[:context] - # logger.debug("JOB: " + @job.to_json) - #check_box belongs_to get_basic_info() get_statuses() get_job_size() get_views() @analyses = [] @inputs = [] @h_job_form = JSON.parse(@job.form_json) session[:current_level] = (@h_job_form['bin_levels'] && @h_job_form['bin_levels'][0]) || nil @h_form['fields']['Inputs'].select{|f| f['type'] == 'file'}.each do |f| if @h_job_form[f['id']] @inputs.push({:id => f['id'], :label => f['label'] || f['id'].gsub(/_/, ' ').capitalize}) end end @h_form['fields']['Analysis'].select{|f| f['type'] == 'check_box' and !f['belongs_to']}.each do |f| if @h_job_form[f['id']] #and @h_job_form[f['id']]==true @analyses.push({:id => f['id'], :label => f['label'] || f['id'].gsub(/_/, ' ').capitalize}) end end if !readable? @job redirect_to root_path, notice: 'Cannot access this resource.' end end # ALLOW UPDATE OF INDEX UPON CHANGE def update_index @previous_jobs = params[:previous_jobs] || nil @scope = params[:scope] if current_user.role == "admin" and @scope == 'global' @jobs = Job.all else @jobs = current_user.jobs end get_total_jobs_size() end def get_basic_info json_file = Rails.root.join('lib', 'genocrunch_console', 'etc', 'genocrunchlib.json') file_content = File.read(json_file) @h_form = JSON.parse(file_content) @h_tips = JSON.parse(File.read(Rails.root.join('public', 'app', 'tips.json'))) @h_help = {} @h_field_groups = {} @h_form['fields'].each_key{|card_title| @h_form['fields'][card_title] = @h_form['fields'][card_title].select{|field| field['scope'] != 'cli_only'} @h_form['fields'][card_title].map{|field| @h_help[field['id']] = field['help'] if field['belongs_to'] @h_field_groups[field['belongs_to']]||=[] @h_field_groups[field['belongs_to']].push(field) end } } end # SET A NEW JOB def new @user = (current_user) ? current_user : User.where(:username => 'guest').first - - if @user.storage_quota > 0 and @user.total_jobs_size > @user.storage_quota - flash[:notice] = 'Sorry, the storage quota is exceeded.\nPlease clean up old analysis first.' - redirect_to root_url - return - end - @job = @user.jobs.new session[:current_key] = create_key() @job.key = session[:current_key] get_basic_info @default = {} end def set_fields p @missing_fields = [] @present_fields = [] get_basic_info @h_fields ={} @h_form['fields'].each_key{|card_title| @h_form['fields'][card_title].map{|field| @h_fields[field['id']]=field } } @log = '' flag=0 if p flag=1 - ### check if some parameters are not allowed + ### check if some parameters are not allowed p.each_key do |k| if !@h_fields[k] flag = 0 break end end - ### check if all parameters required are submitted + ### check if all parameters required are submitted @h_fields.keys.select{|k| @h_fields[k]['optional'] == false}.each do |k| #logger.debug("EXPLORE field " + k) if (@h_fields[k]['type']== 'file' and ((p[k] and !p[k].blank? ) or !params[:p2][k].empty?) ) or (p[k] and !p[k].empty?) @present_fields.push(@h_fields[k]) else @missing_fields.push(@h_fields[k]) flag = 0 end end end return flag end def create_dirs - user_id = (current_user) ? current_user.id.to_s : "1" - - dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + user_id + dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @job.user_id.to_s Dir.mkdir dir if !File.exist? dir - dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + user_id + @job.key + dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @job.user_id.to_s + @job.key Dir.mkdir dir if !File.exist? dir end def write_files p - user_id = @job.user.id.to_s - #user_id = (current_user) ? current_user.id.to_s : "1" - dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + user_id + @job.key + 'input' + dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @job.user_id.to_s + @job.key + 'input' Dir.mkdir dir if !File.exist? dir form_json = JSON.parse @job.form_json if @job.form_json and !@job.form_json.empty? fields = ['primary_dataset', 'map', 'secondary_dataset'] fields.each do |k| #logger.debug("Key:" + k) filepath = dir + ( k + '.txt') content = (p[k]) ? p[k].read : nil if content and !content.empty? params[:p2][k] = p[k].original_filename File.open(filepath, 'w') do |f| #logger.debug("CONTENT FILE: " + content) f.write content end `dos2unix #{filepath}` `mac2unix #{filepath}` elsif form_json and form_json[k.to_s] params[:p2][k] = form_json[k.to_s]['original_filename'] params[:p][k] = form_json[k.to_s] end end end # CREATE A NEW JOB def create @job = Job.new(:key => params[:tmp_key]) - + + @job.user_id = (current_user) ? current_user.id : 1 + @job.sandbox = (current_user) ? false : true + create_dirs() write_files(params[:p]) @valid_job = set_fields params[:p] @job.name = params[:p][:name] @job.description = params[:p][:description] @job.form_json = params[:p].to_json @default = params[:p] - if current_user - @job.user_id = current_user.id - @job.sandbox = false - end - ## not possible to create a new job with an existing key @valid_job = 0 if !@job.key.empty? and Job.where(:key => @job.key).first respond_to do |format| if @valid_job==1 and @job.save @job.delay.perform # run analysis as delayed job session[:agree_with_terms] = true session[:current_key]=create_key() if current_user format.html { redirect_to job_path(@job.key) } format.json { render action: 'new', status: :created, location: @job } else format.html { render action: 'new' } format.json { render json: @job.errors, status: :unprocessable_entity } end end end def edit get_basic_info @default = JSON.parse(@job.form_json) params[:p2]={} ['primary_dataset', 'map', 'secondary_dataset'].select{|k| @default[k]}.each do |k| params[:p2][k] = @default[k]['original_filename'] end end # PATCH/PUT /jobs/1 # PATCH/PUT /jobs/1.json def update @update_only = params[:job][:update_only] if params[:job] if @update_only @job.update_attributes(@update_only.to_sym => params[:job][@update_only.to_sym]) # Update form_json form_json = JSON.parse(@job.form_json) form_json[@update_only] = params[:job][@update_only.to_sym] @job.update_attributes(:form_json => form_json.to_json) # Update params.json files data_dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @job.user_id.to_s + @job.key.to_s for p in ['input', 'output'] do p_fp = data_dir + p + "params.json" if File.exist?(p_fp) p_content = JSON.parse(File.read(p_fp)) p_content[@update_only] = params[:job][@update_only.to_sym] File.open(p_fp, 'w') do |f| f.write p_content.to_json end end end else write_files(params[:p]) valid_job = set_fields params[:p] h_job = { :name => params[:p][:name], :description => params[:p][:description], :form_json => params[:p].to_json } @default = JSON.parse(@job.form_json) @default = params[:p] respond_to do |format| if valid_job==1 and @job.update_attributes(h_job) kill_job(@job) @job.delay.perform # run analysis as delayed job session[:agree_with_terms] = true format.html {redirect_to job_path(@job.key)} format.json { head :no_content } else format.html { render action: 'edit'} format.json { render json: @job.errors, status: :unprocessable_entity } end end end end # DELETE /jobs/1 # DELETE /jobs/1.json def destroy job_dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @job.user_id.to_s + @job.key.to_s FileUtils.rm_rf(job_dir) @job.destroy respond_to do |format| format.html { redirect_to jobs_url } format.json { head :no_content } end end private def get_job_size data_dir = Pathname.new(APP_CONFIG[:data_dir]) + "users" + @job.user_id.to_s + @job.key.to_s js = `du -sb #{data_dir} | cut -f1`.to_i if js != @job.size.to_i @job.update_column(:size, js) end end def get_total_jobs_size if current_user @total_jobs_size = current_user.jobs.all.sum(&:size).to_i if @total_jobs_size != current_user.total_jobs_size.to_i current_user.update_column(:total_jobs_size, @total_jobs_size) end end end # Use callbacks to share common setup or constraints between actions. def set_job -# if current_user.role == "admin" -# @job = Job.all.find(params[:key]) -# else -# @job = current_user.jobs.find(params[:key]) -# end @job = Job.where(:key => params[:key] || params[:tmp_key]).first - session[:current_key] = @job.key if action_name != 'clone' #if current_user + session[:current_key] = @job.key if action_name != 'clone' @user = @job.user #logger.debug("JOB: " + @job.to_json) - # @job = nil if !readable? @job end # Never trust parameters from the scary internet, only allow the white list through. def job_params params.require(:job).permit() end end diff --git a/app/views/home/_doc.html.erb b/app/views/home/_doc.html.erb index 72aa511..d1e0766 100644 --- a/app/views/home/_doc.html.erb +++ b/app/views/home/_doc.html.erb @@ -1,515 +1,515 @@

Documentation

Version 1.3 / 04.05.2018

Overview

Genocrunch is a web-based data analysis platform dedicated to metagenomics and metataxonomics. It is tailored to process datasets derived from high-throughput nucleic acids sequencing of microbial communities <%= image_tag('genocrunch_pipeline-1.png', height:'14px') %>, such as gene counts or counts of operational taxonomic units (OTUs) <%= image_tag('genocrunch_pipeline-2.png', height:'14px') %>. In addition to such primary dataset, it also allows the integration of a secondary dataset (e.g. metabolites levels) <%= image_tag('genocrunch_pipeline-3.png', height:'14px') %>.
Genocrunch provides tools covering data pre-processing and transformation, diversity analysis, multivariate statistics, dimensionality reduction, differential analysis, clustering as well as similarity network analysis and associative analysis. The results of clustering are automatically inferred as additional models into other analyses <%= image_tag('genocrunch_pipeline-4.png', height:'14px') %>.
Interactive visualization is offered for all figures.

- <%= image_tag('genocrunch_pipeline.png', width:'100%') %> + <%= image_tag('genocrunch_pipeline.png', class: 'pipeline-figure', alt: 'Genocrunch pipeline') %>

New users and registration

Running an analysis session does not require any registration, however, managing multiple analysis (recovering/editing previous work) is done through a personnal index that requires users to sign in. Before signing in, new users need to register.

Registering

Registering is done via the Register button of the top-bar. The registration process requires new users to chose a password. The email address is only used to recover forgotten passwords. <% if APP_CONFIG[:user_confirmable] %> In order to complete the registration process, new users need to follow a confirmation link which is automatically sent to the provided email address. In case of issue, this confirmation link can be resent to the provided email address via the Resend confirmation instruction link on the welcome page. <% end %>

Signing in

Signing in is done from the welcome page, which is accessible via the Sign In button of the top-bar. Before signing in, new users need to complete the registration process.

Trying Genocrunch

The best way to try Genocrunch is to load an example and edit it.

Examples

Examples of analyses are provided via the Examples link of the top-bar. Loading Example data into a new analysis session is possible via the Load button. This allows to visualize and edit Examples without restriction. For signed-in users, it will also copy the example into their personal analysis index.

Analyzing data

Running a new analysis

New analyses can be submitted via the New Analysis button of the topbar. If the user is signed out, the analysis session will be associated to the browser session and remain available as long as the browser is not closed. If the user had been signed-in, the analysis will be stored in his personal index. Analyses parameters are set by filling a single form comprising four parts for Inputs, Pre-processing, Transformation and Analysis. Analyses are then submitted via the Start Analysis button located at the bottom the form. See the Inputs and Tools section below for details.

Editing an analysis

Analyses can be edited via the associated Edit button present on the analysis page and via the edit icons () of the analysis index (for signed-in users only). Editing is performed by updating the analysis form and submiting the modifications via the Restart Analysis button located at the bottom of the form.

Cloning an analysis

Analyses can be copied via the copy icons () of the analysis index (for signed-in users only).

Deleting an analysis

Analyses can be deleted via the delete icons () of the analysis index (for signed-in users only). This process is irreversible.

Inputs and Tools

The analyses are set from a single form composed of four sections: Inputs, Pre-processing, Transformation, Analysis. These sections and their associated parameters are described below.

Inputs

The Inputs section of the analysis creation form allows to upload data files.

Pre-processing

The Pre-processing section of the analysis creation form specifies modifications that will be applied to the primary dataset prior to analysis.

Transformation

The Transformation section of the analysis creation form propose additional modifications for both primary and secondary datasets.

Analysis

The Analysis section of the analysis creation form sets which statistics to apply and which figures to generate.

Versions

Genocrunch data analysis tools are based on R libraries. Used libraries are mentionned in the analysis report. The version of the libraries are available on the Versions page via the Infos menu of the topbar).

Analysis report

A detailed analysis report is generated for each analysis. It is available directly after creating/updating a new analysis via the My Analysis button of the topbar or via the corresponding icon () in the analysis index (for signed-in users only). The analysis report includes detailed descriptions of the tools and methods used, possible warning and error logs as well as intermediate files, final files and interactive figures as they become available. Information for the pre-processing and data transformation are available for the primary dataset and the optional secondary dataset in their respective sections. Figures and descriptions for each analysis are available in separate sections. If multiple binning levels were specified, figures for each levels can be accessed via the associated Level button.

Exporting figures

Figures can be exported via the associated Export menu. All figures can be exported as a vector image in the SVG format. Some figures are also available in the PDF format, but PDFs are generated in the same time as the figure raw data and cannot be edited with the interactive visualization tools. Raw figure data are also available either in the JSON format, the TXT format or both. Figure legends are currently only available in the HTML format.

Archive

Files containing initial, intermediate and final data as well as the analysis log and logs for standard output and standard error are automatically included into a .<%= APP_CONFIG[:archive_format] %> archive. This archive can be downloaded via the button of the analysis report page or via the corresponding icon () in the analysis index (for signed-in users only). Although some figures may be automatically included in the archive in the form of a minimal PDF file, the archive does generally not contain figure images. These should be exported separately via the Export menu.

Bugs report

During computation (while the analysis is running), the standard error (stderr) stream is redirected into a bugs report that can be downloaded using the link located at the bottom-right of the details page in the analysis report.

Usage limits

Time limits

Analyses are by default kept for <%= APP_CONFIG[:max_sandbox_job_age] %> days folowing their last update. However, analyses owned by registered users are kept for <%= APP_CONFIG[:max_job_age] %> days folowing their last update. Old analyses are automatically deleted from the server after their reach these limits.

Storage limits

The storage quota is set to <%= number_to_human_size(APP_CONFIG[:max_storage].to_i, precision: 2, prefix: :si) %> per user.

diff --git a/app/views/jobs/_form.html.erb b/app/views/jobs/_form.html.erb index 55dbadf..216e871 100644 --- a/app/views/jobs/_form.html.erb +++ b/app/views/jobs/_form.html.erb @@ -1,83 +1,82 @@ -<%= form_for(@job, :url => ((action_name == 'edit') ? job_path(@job.key) : jobs_path), :html => {:multipart => true}) do |f| %> - +<%= form_for(@job, :url => ((['edit', 'update'].include?(action_name)) ? job_path(@job.key) : jobs_path), :html => {:multipart => true}) do |f| %> <% ['primary_dataset', 'map', 'secondary_dataset'].each do |k| %> <%= hidden_field_tag "p2[" + k + "]", (params[:p2] && params[:p2][k]) || '' %> <% end %> <%= hidden_field_tag "default_bin_levels", @default['bin_levels'].to_json %> <%= hidden_field_tag "url_read_file_column", read_file_column_jobs_path() %> <%= hidden_field_tag 'tmp_key', @job.key %>
<% @h_form['fields'].each_key do |card_title| %>

<%= card_title %> <% if card_title == 'Inputs' %> <% l = ['', ''] %> <% end %>

<%= render :partial => 'card_form', :locals => {:list_fields => @h_form['fields'][card_title].reject{|e| e['belongs_to']}} %>
<% end %>
<% type_job = (action_name == 'edit') ? 'Restart' : 'Start' %> <% if !current_user and !session[:agree_with_terms] %> <%= f.submit type_job + " Analysis", data: { confirm: "Before using Genocrunch, you must agree with the following Terms of Service:\n\n"+File.read('public/app/TERMS_OF_SERVICE.txt') }, :class => "btn btn-success btn-lg col-md-12 mt-1" %> <% else %> <%= f.submit type_job + " Analysis", :class => "btn btn-success btn-lg full-width" %> <% end %>
<%= javascript_tag do %> $(document).ready(function() { var l = ['category_column', 'bin_levels', 'basic_model']; for (var i =0; i< l.length; i++){ $("#p_" + l[i]).change(function(){ if ($("#p_" + l[i] + " option").length == 0) $("#p_" + l[i] + "-container").addClass("hidden"); }); } $("#p_category_column").change(function(){ var ori_filename = $("#p2_primary_dataset").val(); var url = (ori_filename != '') ? "<%= read_file_column_jobs_path() + '?file_key=primary_dataset' %>" : null; var val = <%= raw (@default["bin_levels"]) ? @default["bin_levels"].to_json : "[]" %> update_bin_levels($(this).val(), val, url) }); $("select.belongs_to").change(function(){ var d = $(this).parent().parent().parent().parent().parent().parent().children().filter('.card-header').first().children().filter('.form-check').first().children().filter('.form-check-label').first().children().filter('.form-check-input').first(); d.prop("checked", "checked"); }); $(".belongs_to").keyup(function(){ var d = $(this).parent().parent().parent().parent().children().filter('.card-header').first().children().filter('.form-check').first().children().filter('.form-check-label').first().children().filter('.form-check-input').first(); d.prop("checked", "checked"); }); }); <% end %> <% end %> diff --git a/app/views/layouts/application.html.erb b/app/views/layouts/application.html.erb index 3d5ac7c..3888ed1 100644 --- a/app/views/layouts/application.html.erb +++ b/app/views/layouts/application.html.erb @@ -1,160 +1,160 @@ <%= Rails.application.class.parent_name %> <%= stylesheet_link_tag "application", media: "all", "data-turbolinks-track" => true %> <%= javascript_include_tag "application", "data-turbolinks-track" => true %> <%= csrf_meta_tags %>
<%= yield %>
diff --git a/app/views/users/sessions/_infos.html.erb b/app/views/users/sessions/_infos.html.erb index ec895e2..b02b08b 100644 --- a/app/views/users/sessions/_infos.html.erb +++ b/app/views/users/sessions/_infos.html.erb @@ -1,50 +1,52 @@
<%# PIPELINE %> + +
+ <%= image_tag('genocrunch_pipeline.png', class: 'pipeline-figure', alt: 'Genocrunch pipeline') %> +

Genocrunch is a web-based data analysis platform dedicated to metagenomics and metataxonomics. It is tailored to process counts datasets derived from high-throughput nucleic acids sequencing of microbial communities <%= image_tag('genocrunch_pipeline-1.png', height:'14px') %>, such as gene counts or counts of operational taxonomic units (OTUs) <%= image_tag('genocrunch_pipeline-2.png', height:'14px') %>. In addition to such primary dataset, it also allows the integration of a secondary dataset (e.g. metabolites levels) <%= image_tag('genocrunch_pipeline-3.png', height:'14px') %>.

Genocrunch provides tools covering data pre-processing and transformation, diversity analysis, multivariate statistics, dimensionality reduction, differential analysis, clustering as well as similarity network analysis and associative analysis. The results of clustering are automatically inferred as additional models into other analyses <%= image_tag('genocrunch_pipeline-4.png', height:'14px') %>.

Interactive visualization is offered for all figures.

-
- <%= image_tag('genocrunch_pipeline.png', width:'100%', alt: 'Genocrunch pipeline') %> -
+
diff --git a/lib/genocrunch_console/lib/genocrunchlib.R b/lib/genocrunch_console/lib/genocrunchlib.R index b78eeba..2261033 100755 --- a/lib/genocrunch_console/lib/genocrunchlib.R +++ b/lib/genocrunch_console/lib/genocrunchlib.R @@ -1,2906 +1,2911 @@ #genocrunchlib.R ################ # Print message in a formatted way so it can be decomposed into categories of # logs. Can also be used to issue errors # Args: # (1) label (character) A label for the message (ex: title, log, warning...) # (2) text (character) Message to print # (3) verbose (character) A statement: 'TRUE' or same value as 'label':print # to stdout; 'FALSE':don't print; # Prints to stdout: # Tab-separated: Path to executed script, date and time, 'label' and 'text' ################ log_fp <- NULL PrintMsg <- function(text='default message', verbose=TRUE, stop=FALSE, log=log_fp) { if (!is.null(log)) { f <- file(log, open="at") sink(file=f, append=TRUE) sink(type="message") } if (as.logical(verbose) == TRUE) { cat(text) } if (!is.null(log)) { sink() sink(type="message") close(f) } if (as.logical(stop) == TRUE) { stop(text) } } ################ # Write a table to an output location, with the option of having indented # columns name if 'name'=''. # Args: # (1) table Table object # (2) output.fp Path to the output file # (3) name Name of first column # (4) sep Separator character # (5) na na # (6) dec dec # (7) quote quote # Outputs: # Writes 'table' to 'output.fp', starting first line with an indentation ('\t') ################ WriteTable <- function(table=NULL, output.fp='', name='name', sep='\t', na='', dec='.', quote=FALSE) { write.table(t(c(name, names(table))), file=output.fp, append=FALSE, row.names=FALSE, col.names=FALSE, sep=sep, na=na, dec=dec, quote=quote) write.table(table, file=output.fp, append=TRUE, row.names=TRUE, col.names=FALSE, sep=sep, na=na, dec=dec, quote=quote) } ################ # Reads the "Rfunctions" section of a json file as a json string # Args: # (1) json Path to the json file # Outputs: # A json string as found in the "Rfunctions" section of the json file ################ ReadJson <- function(file='/../etc/genocrunchlib.json', rel=TRUE) { suppressMessages(library('rjson')) if (rel) { # Consider file as a relative path with respect to this library args <- commandArgs(trailingOnly=FALSE) dir <- dirname(sub('--file=', '', args[grep('--file=', args)])) file <- normalizePath(paste(dir, file, sep=''), mustWork=TRUE) } if (!file.exists(file)) { PrintMsg(paste('"error":"File not found: ', file, '."', sep=''), verbose=TRUE, stop=TRUE) } json_str <- fromJSON(file=file) return (toJSON(json_str$choices)) } ################################################################################ ################################################################################ # MANIPULATION ################################################################# # MANIPULATION ################################################################# # MANIPULATION ################################################################# # MANIPULATION ################################################################# ################################################################################ ################################################################################ ################ # Convert a data frame of factors to a numeric data frame. Unlike data.matrix, # it will force characters to numeric. # Args: # (1) x A data frame # Returns: # A numeric version of 'x' ################ ForceNumericDataFrame <- function(x) { x <- as.matrix(x) for (i in 1:ncol(x)) { x[, i] <- as.numeric(as.vector(unlist(x[, i]))) } return (as.data.frame(x)) } ################ # Convert values of a numeric vector to percents of its sum # Args: # (1) x A numeric vector # Returns: # A percent version of 'x' ################ Numeric2Percent <- function(x){ return (100*x/sum(x)) } ################ # Scale a numeric vector on a range of values # Args: # (1) x A numeric vector # (2) range A numeric vector containing 2 elements # (3) force A logical indicating wheter infinite values should be forced into # 'range' # Returns: # A scaled version of 'x', within 'range' ################ RangeScale <- function(x=NULL, range=c(0, 1), force=TRUE) { scaled <- range[1]*(1-((x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))))+range[2]*((x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))) if (force == TRUE) { scaled[scaled == -Inf] <- min(range) scaled[scaled == Inf] <- max(range) } return (scaled) } ################ # Scale by standard deviation and center on mean # Args: # (1) x A numeric vector # Returns: # A scaled version of 'x' ################ cScale <- function(x=NULL) { sd <- sd(x, na.rm=TRUE) mean <- mean(x, na.rm = TRUE) if (is.na(sd)) { sd <- 1 } else if (sd == 0) { sd <- 1 } if (is.na(mean)) { mean <- 0 } return (x/sd-mean) } ################################################################################ ################################################################################ # JSON HANDELING ############################################################### # JSON HANDELING ############################################################### # JSON HANDELING ############################################################### # JSON HANDELING ############################################################### ################################################################################ ################################################################################ ################ # Build a json string to store figure data # Args: # (1) data (data-frame) # (2) xmetadata (data-frame) With nrow=ncol(data) # Returns: # A json string ################ Dataframe2DataJson <- function(data=NULL, xmetadata=NULL){ # Rules: # colname(data) -> [{'name'}] # rowname(data) and data -> [{'data':['rowname(data)':'data']}] # xmetadata -> [{'rowname(xmetadata)'}] json <- c(1:ncol(data)) for (i in 1:ncol(data)) { xmetadata.json <- NULL if (!is.null(xmetadata)) { xmetadata.json <- c(1:ncol(xmetadata)) for (j in 1:ncol(xmetadata)) { xmetadata.json[j] <- paste('"', names(xmetadata)[j], '":"', xmetadata[i, j], '"', sep='') } xmetadata.json <- paste(xmetadata.json, collapse=',') } data.json.el <- c(1:nrow(data)) for (j in 1:nrow(data)) { data.json.el[j] <- paste('"', row.names(data)[j], '":"', data[j, i], '"', sep='') } data.json <- paste('{', paste(data.json.el, collapse=','), '}',sep='') json[i] <- paste('{', paste(c(paste('"name":"', names(data)[i], '"', sep=''), xmetadata.json, paste('"data":', data.json, sep='')), collapse=','), '}', sep='') } return (paste('[', paste(json, collapse=','), ']', sep='')) } ################ # Build a json string to store diversity values # Args: # (1) diversity (list) # (2) p.value (list) # (3) map (matrix) # Returns: # A json string ################ diversity2json <- function(diversity=NULL, p.value=NULL, map=NULL){ rar <- row.names(diversity) nsample <- ncol(diversity) sample <- c(1:nsample) for (j in 1:nsample) { ndata <- nrow(diversity) data <- c(1:ndata) for (k in 1:ndata) { data[k] <- paste('"', rar[k], '":', diversity[k, j], sep='') } metadata <- c(1:ncol(map)) for (k in 1:ncol(map)) { metadata[k] <- paste('"', names(map[k]), '":"', map[j, k], '"', sep='') } sample[j] <- paste('{"name":"', row.names(map)[j], '",', paste(metadata, collapse=','), ',"data":{', paste(data, collapse=','), '}}', sep='') } if (!is.null(p.value)) { stat <- c(1:length(p.value)) for (j in 1:length(p.value)) { s <- c(1:length(p.value[[j]])) for (k in 1:length(p.value[[j]])) { s[k] <- paste('{"name":"', names(p.value[[j]])[k], '", "p-value":"', p.value[[j]][1, k], '"}', sep='') } stat[j] <- paste(s, collapse=',') } json <- paste('{"data":[', paste(sample, collapse=','), '],"stat":[', paste(stat, collapse=','), ']}', sep='') } else { json <- paste('{"data":[', paste(sample, collapse=','), ']}', sep='') } return (json) } ################ # Build a json tree from dendrogram # Args: # (1) dendrogram (list) A dendrogram (nested lists) # Returns: # A json string ################ dendrogram2json <- function(dendrogram=NULL, json=NULL, it=0) { start <- FALSE if (is.null(json)) { start <- TRUE json <- paste('{"name":"node', it, '", "children":[', sep='') } json.vect <- c(1:length(dendrogram)) for (i in 1:length(dendrogram)) { if (length(dendrogram[[i]]) > 1) { it <- it+1 json.vect[i] <- paste('{"name":"node', it, '", "children":[', sep='') dendrogram2json.out <- dendrogram2json(dendrogram[[i]], json.vect[i], it) json.vect[i] <- dendrogram2json.out[[1]] it <- dendrogram2json.out[[2]] json.vect[i] <- paste(json.vect[i], ']}', sep='') } else { json.vect[i] <- paste('{"name":"', dendrogram[[i]], '"}', sep='') } } if (start) { json <- paste(json, paste(json.vect, collapse=','), ']}', sep='') } else { json <- paste(json, paste(json.vect, collapse=','), sep='') } out <- list() out[1] <- json out[2] <- it return (out) } ################ # Build a json network from similarity matrix # Args: # (1) mat list of numeric matrix # Returns: # A json string ################ buildFusionNetworkJson <- function(mat=NULL, map=NULL){ names <- colnames(mat[[1]]) ncol <- ncol(mat[[1]]) if (!is.null(map)) { ncol.map <- ncol(map) } nodes <- c(1:ncol) for (i in 1:ncol){ if (!is.null(map)) { map.row <- c(1:ncol.map) for (j in 1:ncol.map) { map.row[j] <- paste('"', names(map)[j], '":"', map[i, j], '"', sep='') } } else { map.row <- '"none":"none"' } nodes[i] <- paste('{"id":', i-1, ',"name":"', names[i], '",', paste(map.row, collapse=','), '}', sep='') } links <- c(1:(ncol*(ncol/2-1))) k <- 1 for (i in 1:(ncol-1)) { for (j in (i+1):ncol) { weights <- c(1:length(mat)) for (l in 1:length(mat)) { weights[l] <- mat[[l]][i,j] } links[k] <- paste('{"source":', i-1, ',"target":', j-1, ',"weight":[', paste(weights,collapse = ","), ']}', sep='') k <- k+1 } } return(paste('{"nodes":[', paste(nodes, collapse = ","),'],"links":[', paste(links, collapse = ","), ']}', sep='')) } ################################################################################ ################################################################################ # MODIFICATION ################################################################# # MODIFICATION ################################################################# # MODIFICATION ################################################################# # MODIFICATION ################################################################# ################################################################################ ################################################################################ ################ # Sort a data-frame columns according to a column of another # Args: # (1) table (matrix-like object) Table to be sorted # (2) map (matrix-like object) Map to extract the sorting column from # (3) column (numeric) Index of the column in map to use for sorting # (4) verbose (bool) Print messages? see PrintMsg() options # Returns: # A sorted version of table # Print to stdout: # A description ################ SortTable <- function(table=NULL, map=NULL, verbose=TRUE) { # print(row.names(map)) # print(map) # print(map[, column]) PrintMsg('"description":"The table columns were sorted according to the map."', verbose) return(as.data.frame(table[, as.vector(row.names(map))])) } ################ # Transpose a data frame (or a matrix-like object) # Args: # (1) x (matrix-like object) # Returns: # A transposed version of 'x' (numeric dataframe) ################ Transpose <- function(x=NULL) { if (is.data.frame(x)) { names <- names(x) row.names <- row.names(x) x <- as.data.frame(t(x)) names(x) <- row.names row.names(x) <- names } else { x <- t(x) } return (x) } ################ # Filter numeric table (with category-specific options for OTU-type tables) # Args: # (1) table (numeric dataframe) Count table # (2) category (character) Name of the (optional) category column # (3) abundance_threshold (numeric) Min % abundance per column [0,100] # (4) presence_threshold (numeric) Min % presence per row [0,100] # (5) rm_unassigned (logical) Remove unassigned category ? # (6) verbose (character) Print messages? see PrintMsg() options # Returns: # An filtered version of 'table' (numeric dataframe) # Prints to stdout: # A description of the function ################ FilterTable <- function(table=NULL, category='taxonomy', abundance_threshold=0.03, abundance_threshold_type='percent', presence_threshold=1, presence_threshold_type='int', verbose=TRUE) { nrow.init <- nrow(table) # First filter based on minimal abundance msg <- paste('"description":"Removed rows not exceeding ', abundance_threshold, sep='') filter <- table[, names(table) != category] minmax.a <- paste(min(filter), max(filter), sep='-') if (abundance_threshold_type == 'percent') { filter <- as.data.frame(apply(filter, 2, Numeric2Percent)) msg <- paste(msg, '%', sep='') minmax.a <- paste(min(filter), '-', max(filter), '%', sep='') } max.abundance <- apply(filter, 1, max) table.filtered <- table[max.abundance >= abundance_threshold, ] nrow.filtered.a <- nrow(table.filtered) msg <- paste(msg, ' in at least one column: ', nrow.init-nrow.filtered.a, ' rows were removed out of ', nrow.init, '. Then, removed rows with presence (number of values not equal to zero) lower than ', presence_threshold, sep='') # Then filter based on minimal presence filter <- table.filtered[, names(table.filtered) != category] presence <- rowSums(filter != 0) minmax.p <- paste(min(presence), max(presence), sep='-') if (presence_threshold_type == 'percent') { presence <- Numeric2Percent(presence) msg <- paste(msg, '%', sep='') minmax.p <- paste(min(presence), '-', max(presence), '%', sep='') } table.filtered <- table.filtered[presence >= presence_threshold, ] nrow.filtered.p <- nrow(table.filtered) nstop <- nrow(table.filtered) ab.min <- round(min(filter), digits=4) ab.max <- round(max.abundance, digits=4) msg <- paste(msg, ': ', nrow.filtered.a-nrow.filtered.p, ' rows were removed out of ', nrow.filtered.a, ' (final number of rows = ', nrow.filtered.p, '). Min-max abundance found was ', minmax.a, '. Min-max presence found was ', minmax.p, '."', sep='') PrintMsg(msg, verbose) if (nrow.filtered.p == 0) { PrintMsg(paste('"error":"All the rows were filtered out. Please chose an abundance threshold within ', minmax.a, ' and a presence threshold within ', minmax.p, '."', sep=''), verbose, TRUE) } return (table.filtered) } ################ # Bin numeric table by category # Args: # (1) table (numeric dataframe) Count table # (2) aggr.col (character) Name of the column in 'table' to consider for data # aggregation # (3) fun (character) Function to use for data aggregation. Valid # choices: 'mean', 'sum' # (4) verbose (character) Print messages? see PrintMsg() options # Returns: # An aggregated version of 'table' (numeric dataframe) # Prints to stdout: # A description of the function ################ BinTableByCategory <- function(table=NULL, category_col='taxonomy', fun='sum', level='NA', verbose=TRUE, vstyle=1) { if ((category_col == '') || !(category_col %in% names(table)) ) { category_col <- "row names" category <- row.names(table) } else { category <- paste(table[, names(table) == category_col], '; (', row.names(table), ')', sep='') table <- table[, names(table) != category_col] } category <- strsplit(category, ';') if (vstyle == 1) { PrintMsg(paste('"description":"Observations were binned by categories (category column:', category_col, ', function:', fun, ', level:', level, ')."', sep=''), verbose) } else if (vstyle == 2) { PrintMsg(paste('"description":"Observations were binned by categories (category column:', category_col, ', function:', fun, '):"', sep=''), verbose) } if (vstyle > 1) { PrintMsg(paste('"description-item":"Category level ', level, '"', sep=''), verbose) } for (i in 1:length(category)) { category[[i]] <- gsub('^ *.__ *$', '', category[[i]]) category[[i]] <- gsub('^ *', '', category[[i]]) category[[i]] <- gsub(' *$', '', category[[i]]) category[[i]] <- category[[i]][! category[[i]] %in% c('', ' ')] if (length(category[[i]]) >= level) { category[[i]] <- paste(category[[i]][1:level], collapse='; ') } else { category[[i]] <- paste(category[[i]], collapse='; ') } } category <- list(unlist(category)) name <- 'category' while (name %in% names(table)) { name <- paste(name,paste(sample(c(0:9, letters, LETTERS), 3), collapse=''),sep='_') } names(category) <- name table.binned <- aggregate(table, by=category, FUN=fun) row.names(table.binned) <- table.binned[, name] table.binned <- table.binned[, names(table.binned) != name] return(table.binned) } ################ # Divide a data frame (or a matrix-like object) by a vector # Args: # (1) table (numeric dataframe) Count table # (2) vect (vector) Vector to divide 'table' # (3) verbose (character) Print messages? see PrintMsg() options # Returns: # A divided (per row) version of 'table' (numeric dataframe) # Prints to stdout: # A description of the function ################ DivTable <- function(table=NULL, vect=NULL, verbose='TRUE') { PrintMsg('log', 'Divide tables rows by a vector.', verbose=verbose) return (as.matrix(table) / unlist(vect)) } ################ # Transform to log2 count per milion # Args: # (1) table (numeric dataframe) Count table # (2) verbose (character) Print messages? see PrintMsg() options # Returns: # A transformed version of 'table' (numeric dataframe) # Prints to stdout: # A description of the function ################ ApplyLog2Cpm <- function(table=NULL, verbose='TRUE') { if (length(table[table < 0]) > 0) { PrintMsg('"error":"The log2cpm transformation was not applied because negative values were detected."', verbose, TRUE) return () } if (length(table[table%%1 > 0]) > 0) { PrintMsg('"warning":"Non-integer values were detected."', verbose) table <- round(table, digits=0) } PrintMsg('"description":"Values were converted into per-million per column and a log-transformation was applied (log2(x+1))."', verbose) transformed.table <- as.data.frame(log2(1+(apply(as.matrix(table), 2, Numeric2Percent))*10000)) names(transformed.table) <- names(table) row.names(transformed.table) <- row.names(table) return (transformed.table) } ################ # Transform to log2 # Args: # (1) table (numeric dataframe) Count table # (2) verbose (character) Print messages? see PrintMsg() options # Returns: # A transformed version of 'table' (numeric dataframe) # Prints to stdout: # A description of the function ################ ApplyLog2 <- function(table=NULL, verbose=TRUE) { if (length(table[table < 0]) > 0) { PrintMsg('"error":"log2(x+1) was not applied because negative values were detected."', verbose, TRUE) return () } PrintMsg('"description":"A log-transformation was applied (log2(x+1))."', verbose) transformed.table <- as.data.frame(log2(1+(as.matrix(table)))) names(transformed.table) <- names(table) row.names(transformed.table) <- row.names(table) return (transformed.table) } ################ # Transform to percent (per column) # Args: # (1) table (numeric dataframe) Count table # (2) verbose (character) Print messages? see PrintMsg() options # Returns: # A transformed version of 'table' (numeric dataframe) # Prints to stdout: # A description of the function ################ ApplyCount2Percent <- function(table, verbose=TRUE) { PrintMsg('"description":"Values were converted into percentages per column."', verbose=verbose) transformed.table <- as.data.frame(apply(as.matrix(table), 2, Numeric2Percent)) names(transformed.table) <- names(table) row.names(transformed.table) <- row.names(table) return (transformed.table) } ################ # Apply rarefaction # Args: # (1) table (numeric dataframe) Count table # (2) sample (positive integer or 'max') Sampling depth # (3) nsampling (positive integer) Number of repeated sampling to performe # (4) verbose (character) Print messages? see PrintMsg() options # Returns: # A rarefied version of 'table' (a numeric dataframe) # Prints to stdout: # A description of the function ################ RarefyTable <- function(table=NULL, sample='max', nsampling=1, verbose=TRUE) { suppressMessages(library('vegan')) # Round counts if not integers if (length(table[table < 0]) > 0) { PrintMsg('"error":"Rarefaction is not applicable because negative values were detected."', verbose, TRUE) return () } if (length(table[table%%1 > 0]) > 0) { PrintMsg('"warning":"Non-integer values were detected. In order to permit rarefaction, these values were rounded to the nearest integer."', verbose) table <- round(table, digits=0) } # Define/check rarefaction depth min.sample <- min(colSums(table)) if (sample == 'max' || as.numeric(sample) > min.sample) { sample <- min.sample } if (sample < 100) { PrintMsg(paste('"warning":"Very low counts detected in the following sample(s): ', paste(names(table)[which(colSums(table) < 100)], collapse=','), '."', sep=''), verbose) } else if (sample < 1000) { PrintMsg(paste('"warning":"Low counts detected in the following sample(s): ', paste(names(table)[which(colSums(table) < 1000)], collapse=','), '."', sep=''), verbose) } # Apply rarefaction table <- t(table) table.list <- list() for (i in 1:as.numeric(nsampling)) { table.list[[i]] <- rrarefy(table, sample=sample) } rarefied.table <- apply(simplify2array(table.list), c(1, 2), mean) # element-wise mean across rarefied tables PrintMsg(paste('"description":"The number of observations per samples were equalized using the rarefaction method with a sampling depth of ', sample, ' (random sampling without replacement) (R package vegan). The rarefied values were calculated as the means of ', nsampling, ' independent samplings that were rounded to the nearest integer."', sep=''), verbose=verbose) return (data.frame(t(round(rarefied.table, digits=0)))) } ################ # Args: # (1) table (numeric dataframe) Count table # (2) map (dataframe) Design # (3) effect (character) comma-delimited list of random effects to # suppress. Elements must be names of columns in mapping file # (4) verbose (character) Print messages? see PrintMsg() options # Returns: # A adjusted version of 'table' (a numeric dataframe) # Prints to stdout: # A description of the function ################ SuppressBatchEffect <- function(table=NULL, map=NULL, effect=NULL, fun='combat', verbose=TRUE) { method <- list(combat='ComBat', mean_centering='mean centering') PrintMsg(paste('"description":"Batch effect suppression on ', effect, ' using', method[[fun]], '"'), verbose) effect <- unlist(strsplit(effect, split=',')) if (fun == 'mean_centering') { table <- t(table) for (i in 1:length(effect)) { fact <- map[effect[i]] fact.name <- as.vector(unique(unlist(fact))) nfact <- length(fact.name) for (j in 1:ncol(table)) { data <- data.frame(d=table[, j], fact=fact) means <- NULL for (k in 1:nfact) { means <- append(means, mean(data[fact == fact.name[k], 'd'])) } for (k in 1:nfact) { adj <- max(means)-means[k] data[fact == fact.name[k], 'd'] <- data[fact == fact.name[k], 'd']+adj } table[, j] <- data$d } } table <- t(table) } else if (fun == 'combat') { suppressMessages(library('sva')) table <- ComBat(table, map[, effect]) } return (table) } ################ # Filter mapping file to keep only terms in model # Args: # (1) map (dataframe) # (2) model (formula vector) # Returns: # A filtered map ################ FilterMap <- function(map=NULL, model=NULL) { # keep only columns of map that are used in the model if (!is.null(model)) { term <- list() for (i in 1:length(model)) { term[[i]] <-as.vector(unique(unlist(strsplit(model[i], split=c('[^A-Za-z0-9_]'))))) term[[i]] <- term[[i]][! term[[i]] %in% c("", " ", "1", "Error")] } term <- as.vector(unlist(term)) row.names <- row.names(map) if (length(term) > 1) { map <- as.data.frame(map[, names(map) %in% term]) } else { map <- as.data.frame(map[, names(map) == term]) names(map) <- term } if (ncol(map) < length(unique(term))) { PrintMsg(paste('"error":"Some terms in model ', model, ' were not found in the map. Please make sure that all model terms refer to columns headers in the map."', sep=''), verbose=TRUE, TRUE) } } return (map) } ################ # Remove constant rows from table # Args: # (1) table (dataframe) # Returns: # A filtered table ################ RemoveConstantRows <- function(table=NULL, verbose=TRUE) { filter <- rep(TRUE, nrow(table)) for (i in 1:nrow(table)) { if (length(as.vector(unique(unlist(table[i, ])))) < 2) { filter[i] <- FALSE PrintMsg(paste('"warning":"Row ', i, ' (', row.names(table)[i], ') contained essentially constant data and was removed."', sep=''), verbose=verbose) } } return (table[filter == TRUE, ]) } ################################################################################ ################################################################################ # ANALYSIS ##################################################################### # ANALYSIS ##################################################################### # ANALYSIS ##################################################################### # ANALYSIS ##################################################################### ################################################################################ ################################################################################ ################ # Apply a clustering algorithm # Args: # (1) table (numeric dataframe) Count table # (2) fun (character) Clustering function (algorithme) # (3) json (character) Json string specifying packages # (4) verbose (character) Print messages? see PrintMsg() options # (5) graphical (logical) Generate graphics? # Returns: # A table of clusters # Output to device: # Depending on 'fun', a figure # Prints to stdout: # A description of the function ################ PerformClustering <- function(table=NULL, fun=c('kmeans', 'pam', 'pam-bray', 'pam-jaccard'), json=libJson, verbose=TRUE, graphical=TRUE) { if (ncol(table) < 10 || nrow(table) < 3) { PrintMsg('"warning":"Not enough data to perform clustering (min 10 column and 3 rows)."', verbose) return (NULL) } else if (ncol(table) > 65536) { PrintMsg('"warning":"Sorry, the dataset is too large for clustering algorithms (max 65536 columns)."', verbose) return (NULL) } spec <- as.data.frame(do.call('rbind', json$clustering)) spec <- spec[spec$value == fun, ] package <- '' pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=',')) if (length(pkg) > 0) { for (i in 1:length(pkg)) { suppressMessages(library(pkg[i], character.only=TRUE)) } package <- paste(' (R package {', unlist(spec$pkg), '})', sep='') } PrintMsg(paste('"description":"Clusters were determined using the ', spec$label, package, '."', sep=''), verbose) table <- t(table) if (fun == 'kmeans') { if (graphical == TRUE) { kmean.out <- kmeansruns(table, criterion='asw', plot=F)$cluster # figure margins too large issue... } else { kmean.out <- kmeansruns(table, criterion='asw')$cluster } cluster <- paste('kmean', kmean.out, sep ='') } else if (fun == 'pam') { pam.out <- pamk(table, usepam=TRUE) if (graphical == TRUE) { plot(pam.out$pamobject, which.plots=2) } cluster <- paste('pam', pam.out$pamobject$cluster, sep ='') } else if (fun == 'pam-bray') { dist.data <- ComputeDistance(table=table, fun='bray', json=json, verbose=verbose) pam.out <- pamk(dist.data, usepam=TRUE) if (graphical == TRUE) { plot(pam.out$pamobject, which.plots=2) } cluster <- paste('pam', pam.out$pamobject$cluster, sep ='') } else if (fun == 'pam-jaccard') { dist.data <- ComputeDistance(table=table, fun='jaccard', json=json, verbose=verbose) pam.out <- pamk(dist.data, usepam=TRUE) if (graphical == TRUE) { plot(pam.out$pamobject, which.plots=2) } cluster <- paste('pam', pam.out$pamobject$cluster, sep ='') } else { return(NULL) } cluster <- as.data.frame(cluster) row.names(cluster) <- row.names(table) names(cluster) <- 'Clustering' PrintMsg(paste('"description":"Clusters: ', paste(paste(row.names(cluster), unlist(cluster), sep=':'), collapse=', '), '."', sep=''), verbose) return (cluster) } ################ # Build a relative abundance stacked barplot # Args: # (1) table (numeric dataframe) Count table # (2) threshold (numeric) Abundance cutoff for display in percent [0;100] # (3) verbose (character) Print messages? see PrintMsg() options # (4) graphical (logical) Generate graphics? # Returns: # Figure data as a json string # Output to device: # A figure # Prints to stdout: # A description of the function ################ AnalyseProportions <- function(table=NULL, map=NULL, verbose=TRUE, graphical=TRUE) { # Convert to relative abundances per column PrintMsg('"description":"Data was converted to proportions (%) per sample."', verbose) if (min(table) < 0) { PrintMsg('"warning":"Negative values were found! Negative values are not supported by this analysis."', verbose) table <- as.data.frame(abs(table)) } table <- as.data.frame(apply(table, 2, Numeric2Percent)) # Order by decreasing values table <- as.data.frame(table[order(rowMeans(table), decreasing=TRUE), ]) # Build stacked barchart if (as.logical(graphical) == TRUE) { col <- rainbow(nrow(table)) par(mar=c(4, 4, 2, 4), xpd=TRUE) barplot(as.matrix(table), ylim=c(0, max(colSums(table))), space=0, border=NA, names.arg=names(table), ylab='%', col=col, las=2, bty="n", cex.names=1.84906*ncol(table)^-0.2898) # Build legend in a separate figure plot.new() legend("topleft", row.names(table), cex=0.8, fill=col, horiz=FALSE) } return (Dataframe2DataJson(data=table, xmetadata=map)) } ################ # Calculate diversity per column # Args: # (1) table (numeric dataframe) Count table # (2) fun (character) The diversity function to use # (3) json (character) Json string specifying packages # (4) verbose (character) Print messages? see PrintMsg() options # Returns: # A vector of length=ncol(table) # Prints to stdout: # A description of the function ################ ComputeDiversity <- function(table=NULL, fun='shannon', json=libJson, verbose=TRUE) { spec <- as.data.frame(do.call('rbind', json$diversity)) spec <- spec[spec$value == fun, ] package <- '' pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=',')) if (length(pkg) > 0) { for (i in 1:length(pkg)) { suppressMessages(library(pkg[i], character.only=TRUE)) } package <- paste(' (R package {', unlist(spec$pkg), '})', sep='') } else { pkg <- '' } PrintMsg(paste('"description":"The ', spec$label, package, ' was calculated for each sample."', sep=''), verbose) diversity <- NULL if (pkg == 'vegan') { for (i in 1:ncol(table)) { diversity <- append(diversity, diversity(table[, i], index=fun, MARGIN=0, base=2), after=length(diversity)) } } else if (pkg == 'ineq') { for (i in 1:ncol(table)) { diversity <- append(diversity, ineq(table[, i], parameter=NULL, type=fun, na.rm=TRUE), after=length(diversity)) } } else if (fun == 'richness') { for (i in 1:ncol(table)) { diversity <- append(diversity, sum(table[, i] != 0), after=length(diversity)) } } else if (fun == 'chao1') { diversity <- chao1(table, taxa.row = TRUE) } else { PrintMsg(paste('"error":"Unknown argument to fun (2):', fun, '"', sep=' '), verbose, TRUE) } return (diversity) } ################ # Compute correlations # Args: # (1) table (numeric dataframe) Count table # (2) fun (character) The correlation methode to use # (3) json (character) Json string specifying packages # (4) verbose (character) Print messages? see PrintMsg() options # Returns: # A vector of length=ncol(table) # Prints to stdout: # A description of the function ################ ComputeCorrelation <- function(table=NULL, fun='pearson', test=FALSE, json=libJson, verbose=TRUE) { spec <- as.data.frame(do.call('rbind', json$correlation)) spec <- spec[spec$value == fun, ] package <- '' pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=',')) if (length(pkg) > 0) { for (i in 1:length(pkg)) { suppressMessages(library(pkg[i], character.only=TRUE)) } package <- paste(' (R package {', unlist(spec$pkg), '})', sep='') } PrintMsg(paste('"description":"Calculated ', spec$label, package, '."', sep=''), verbose=verbose) if (as.logical(test) == TRUE) { cor.output <- list() cor.output[['estimate']] <- matrix(ncol=nrow(table), nrow=nrow(table)) cor.output[['p.value']] <- matrix(ncol=nrow(table), nrow=nrow(table)) for (i in 2:nrow(table)) { for (j in 1:(i-1)) { out <- cor.test(unlist(table[i, ]), unlist(table[j, ]), method=fun) cor.output[['estimate']][i, j] <- out$estimate cor.output[['p.value']][i, j] <- out$p.value } } cor.output[['estimate']][upper.tri(cor.output[['estimate']])] <- t(cor.output[['estimate']])[upper.tri(cor.output[['estimate']])] diag(cor.output[['estimate']]) <- 1 cor.output[['p.value']][upper.tri(cor.output[['p.value']])] <- t(cor.output[['p.value']])[upper.tri(cor.output[['p.value']])] diag(cor.output[['p.value']]) <- 0 return (cor.output) } if (fun == 'pearson') { return (cor(t(table), method=fun, use='pairwise.complete.obs')) } else { return (cor(t(table), method=fun, use='na.or.complete')) } } ################ # Compute distances # Args: # (1) table (numeric dataframe) Count table # (2) fun (character) The distance function to use # (3) json (character) Json string specifying packages # (4) verbose (character) Print messages? see PrintMsg() options # Returns: # A vector of length=ncol(table) # Prints to stdout: # A description of the function ################ ComputeDistance <- function(table=NULL, fun='bray', json=libJson, verbose=TRUE) { spec <- as.data.frame(do.call('rbind', json$distance)) spec <- spec[spec$value == fun, ] package <- '' pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=',')) if (length(pkg) > 0) { for (i in 1:length(pkg)) { suppressMessages(library(pkg[i], character.only=TRUE)) } package <- paste(' (R package {', unlist(spec$pkg), '})', sep='') } if (fun %in% c('pearson', 'spearman')) { PrintMsg(paste('"description":"A distance matrix was calculated based on ', spec$label, package, '."', sep=''), verbose=verbose) return (as.dist(as.matrix(ComputeCorrelation(table, fun=fun, json=json, verbose=FALSE)))) } else { # Correct for negative entries by adding a scalar (Caillez correction method) if (min(table) < 0) { table <- table+abs(min(table)) PrintMsg(paste('"description":"A Caillez correction was applied and a distance matrix was calculated based on ', spec$label, package, '."', sep=''), verbose=verbose) } else { PrintMsg(paste('"description":"A distance matrix was calculated based on ', spec$label, package, '."', sep=''), verbose=verbose) } if ('vegan' %in% pkg) { return (vegdist(table, method=fun, binary=FALSE, diag=TRUE, upper=TRUE)) } } } ################ # Test if model is balanced # Args: # (1) map (data-frame) Experimental design table # (2) model (character) A formula representing the model for stats. Terms # must refer to names(map) # (3) verbose (character) Print messages? see PrintMsg() options # Returns: # A bool indicating whether the design is balanced or not ################ ModelIsBalanced <- function(map=NULL, model=NULL, verbose='TRUE') { # keep only columns of map that are used in the model map <- FilterMap(map=map, model=model) for (i in 1:ncol(map)) { fact <- as.vector(unique(unlist(map[, i]))) n <- length(map[map[, i] == fact[1], i]) if (length(fact) > 1) { for (j in 2:length(fact)) { if (length(map[map[, i] == fact[j], i]) != n) { PrintMsg('log', paste('Model ',model , ' is not balanced.', sep=''), verbose=verbose) return (FALSE) } } } } PrintMsg('note', paste('Model ',model , ' is balanced.', sep=''), verbose=verbose) return (TRUE) } ################ # Test if model's variables could be nested # Args: # (1) map (data-frame) Experimental design table # (2) model (character) A formula representing the model for stats. Terms # must refer to names(map) # (3) verbose (character) Print messages? see PrintMsg() options # Returns: # A list of nested variables ################ ModelIsNested <- function(map=NULL, model=NULL, verbose='TRUE') { # keep only columns of map that are used in the model map <- FilterMap(map=map, model=model) if (ncol(map) < 2) { return (NULL) } out <- list() out.txt <- NULL for (i in 1:ncol(map)) { fact <- as.vector(unique(unlist(map[, i]))) test <- as.data.frame(map[, -i]) names(test) <- names(map)[-i] out[[names(map)[i]]] <- NULL for (j in 1:ncol(test)) { group1 <- as.vector(test[map[, i] == fact[1], j]) nested <- TRUE if (length(as.vector(unique(map[, i]))) >= length(as.vector(unique(test[, j])))) { nested <- FALSE } else if (length(as.vector(unique(group1))) != length(group1)) { nested <- FALSE } else { for (k in 2:length(fact)) { if (!identical(group1, as.vector(test[map[, i] == fact[k], j]))) { nested <- FALSE break } } } if (nested == TRUE) { out[[names(map)[i]]] <- append(out[[names(map)[i]]], names(test)[j], after=length(out[[names(map)[i]]])) out.txt <- append(out.txt, paste(names(test)[i], names(map)[j], sep='/'), after=length(out.txt)) } } } if (length(out) == 0) { return (NULL) } PrintMsg('log', paste('The following nesting was detected in model ', model, ': ', paste(out.txt, collapse=', '), ' Please control that the model is the one you want.', sep=''), verbose=verbose) return (out) } ################ # Compute statistics # Args: # (1) response (numeric vector) # (2) map (data-frame) Experimental design table with # nrow=length(response) # (3) method (character) Statistics to use # (4) model (character) A formula representing the model for stats. Terms # must refer to names(map) # (5) pairwise (bool) Perform pairwise comparision? # (6) verbose (character) Print messages? see PrintMsg() options # Returns: # A data-frame # Prints to stdout: # A description of the function ################ ComputeStats <- function(response=NULL, map=NULL, method='anova', model=NULL, pairwise=TRUE, verbose=TRUE) { if (is.null(method)) { method <- 'anova' PrintMsg('"warning":"The statistical method was not specified. An ANOVA will be performed by default."', verbose) } # If no model is specified, assume all columns in map are fixed variables if (is.null(model)) { model <- paste(names(map), collapse='+') PrintMsg(paste('"warning":"No model was specified. Based on the map, the following model will be used by default: ', model, '."', sep=''), verbose) } # Test that each factor has >= 2 levels term <- as.vector(unique(unlist(strsplit(model, split=c('[^A-Za-z0-9_]'))))) term <- term[! term %in% c("", " ", "1", "Error")] for (i in 1:length(term)) { if (!term[i] %in% names(map)) { PrintMsg(paste('"error":"The following model factor was not found in map: ', term[i], '. Please correct the model or correct the map."', sep=''), verbose=TRUE, stop=TRUE) } levels <- as.vector(unique(unlist(map[, term[i]]))) if (length(levels) < 2) { PrintMsg(paste('"error":"The following model factor has less than 2 levels: ', term[i], '. Please correct the model or correct the map."', sep=''), verbose=TRUE, stop=TRUE) } for (j in 1:length(levels)) { if (length(map[map[, term[i]] == levels[j], ]) == 1) { PrintMsg(paste('"warning":"The following group contains only one element (variance is null): ', levels[j], '. This will impact on statistics."', sep=''), TRUE) } } } response.name <- 'response' while (response.name %in% names(map)) { response.name <- paste('response', paste(sample(c(letters, LETTERS, c(0:9)), 4), collapse=''), sep='') } names(response) <- response.name formula <- paste(response.name, ' ~ ', model, sep='') # Build the dataframe data <- map data <- cbind(map, response) pairwise.output <- NULL if (method == 'anova' || is.null(method)) { # PARAMETRIC, ACCEPTS MODELS WITH >2 FACTORS, MULTIPLE FIXED VARIABLES AND NESTING # MODEL CAN BE OF THE FORM "a*b+c" PrintMsg(paste('"description":"An analysis of variance (ANOVA) was performed using the model: ', model, '."', sep=''), verbose) aov.output <- aov(formula=as.formula(formula), data=data) if (pairwise == TRUE) { pairwise.output <- TukeyHSD(aov.output) } summary <- as.data.frame(summary(aov.output)[[1]]) if ('Pr(>F)' %in% names(summary)) { p.value <- summary[, 'Pr(>F)'] } else { PrintMsg(paste('"warning":"Impossible to calculate a p-value. Please make sure that the model ', model, ' is not saturated."', sep=''), verbose) p.value <- rep('NA', nrow(summary)) } summary <- data.frame(row.names=gsub('[[:space:]]', '', row.names(summary)), p.value=p.value) #explained=summary[, 'Sum Sq']/sum(summary[, 'Sum Sq'])) } else if (method == 'friedman') { # NON-PARAMETRIC, ACCEPTS MODELS WITH >2 FACTORS BUT REQUIRES EXACTLY 2 NESTED VARIABLE # MODEL HAS TO BE OF THE FORM "a | b" friedman.terms <- unlist(strsplit(model, split=' ')) if (length(friedman.terms) != 3 || friedman.terms[2] != '|') { PrintMsg(paste('"error":"The model must be of the form a | b when using Friedman test. Please provide an appropriate model. The current model is: ', model, '."', sep=''), verbose, TRUE) } friedman.output <- friedman.test.with.post.hoc(formula=as.formula(formula), data=data) PrintMsg(paste('"description":"A Friedman test was performed using the model: ', model, '."', sep=''), verbose) if (pairwise == TRUE) { pairwise.output <- list() pairwise.output[[term[1]]] <- data.frame(row.names=paste(unique(unlist(map[, term[1]])), collapse='-'), p=friedman.output$PostHoc.Test) PrintMsg('"description":"A post-hoc test with correction for multiple comparisions was performed."', verbose) } summary <- data.frame(row.names=term[1], p.value=friedman.output$Friedman.Test) } else if (method == 'kruskal') { # NON-PARAMETRIC, ACCEPTS MODELS WITH >2 FACTORS BUT ONLY 1 FIXED VARIABLE # MODEL CAN BE OF THE FORM "a" if (length(term) > 1) { PrintMsg(paste('"error":"The model cannot include more than 1 variable when using Kruskal-Wallis rank sum test. Please provide an appropriate model. The current model is: ', model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE) } suppressMessages(library('dunn.test')) dunn.output <- dunn.test(data[, response.name], g=map[, term[1]], kw=FALSE, table=FALSE, list=FALSE) PrintMsg(paste('"description":"A Kruskal-Wallis rank sum test was performed using the model: ', model, '."', sep=''), verbose) if (pairwise == TRUE) { pairwise.output <- list() pairwise.output[[term[1]]] <- data.frame(row.names=paste(unique(unlist(map[, term[1]])), collapse='-'), p=dunn.output$P) PrintMsg('"description":"A post-hoc test with correction for multiple comparisions (R library {dunn.test}) was performed."', verbose) } summary <- data.frame(row.names=term[1], p.value=pchisq(dunn.output$chi2, nrow(data)-1, lower.tail=FALSE)) } else if (method == 'wilcox') { # NON-PARAMETRIC, ACCEPTS MODELS WITH 2 FACTORS ONLY AND ONE FIXED VARIABLES # MODEL HAS TO BE OF THE FORM "a" if (length(term) > 1) { PrintMsg(paste('"error":"The model cannot include more than 1 variable when using Wilcoxon rank sum test. Please provide an appropriate model. The current model is: ', model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE) } - samples <- unique(unlist(data[, model])) + samples <- as.vector(unlist(unique(unlist(data[, model])))) if (length(samples) != 2) { PrintMsg(paste('"error":"The Wilcoxon rank sum test can only perform two samples comparison. Please provide an appropriate model. The current model includes the following samples: ', sample, '."', sep=''), verbose, TRUE) } wilcox.output <- wilcox.test(formula=as.formula(formula), data=data) PrintMsg(paste('"description":"A Wilcoxon rank sum test was performed using the model: ', model, '."', sep=''), verbose) summary <- data.frame(row.names=paste(samples, collapse='-'), p.value=wilcox.output$p.value) pairwise.output <- list() pairwise.output[[model]] <- as.data.frame(wilcox.output$p.value) names(pairwise.output[[model]]) <- model row.names(pairwise.output[[model]]) <-paste(samples, collapse='-') } else if (method == 'wilcox_paired') { # NON-PARAMETRIC, ACCEPTS MODELS WITH 2 FACTORS ONLY AND ONE FIXED VARIABLES. NESTING IS ASSUMED FROM THE ORDER OF THE DATA! # MODEL HAS TO BE OF THE FORM "a" if (length(term) > 1) { PrintMsg(paste('"error":"The model cannot include more than 1 variable when using Wilcoxon signed rank test. Please provide an appropriate model. The current model is: ', model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE) } samples <- unique(unlist(data[, model])) if (length(samples) != 2) { PrintMsg(paste('"error":"The Wilcoxon signed rank test can only perform two samples paired comparison. Please provide an appropriate model. the current model includes the following samples: ', - paste(sample, collapse=', '), + paste(samples, collapse=', '), '."', sep=''), verbose, TRUE) } x <- data[data[, model] == samples[1], model] y <- data[data[, model] == samples[2], model] if (length(x) != length(y)) { PrintMsg(paste('"error":"A paired comparison in Wilcoxon signed rank test is possible only if samples have same size. Please provide an appropriate model, correct the map or use different statistics. Current samples are: ', samples[1], '(n=', length(x), '), ', samples[2], '(n=', length(y), ')."', sep=''), verbose, TRUE) } wilcox.output <- wilcox.test(formula=as.formula(formula), data=data, paired=TRUE) PrintMsg(paste('"description":"A Wilcoxon signed rank test was performed using the model: ', model, '. Paires: ', paste(paste(row.names(data)[data[, model] == samples[1]], row.names(data)[data[, model] == samples[2]], sep='-'), collapse=', '), '."', sep=''), verbose) summary <- data.frame(row.names=paste(samples, collapse='-'), p.value=wilcox.output$p.value) pairwise.output <- list() pairwise.output[[model]] <- as.data.frame(wilcox.output$p.value) names(pairwise.output[[model]]) <- model row.names(pairwise.output[[model]]) <-paste(samples, collapse='-') } else if (method == 'ttest') { # PARAMETRIC, ACCEPTS MODELS WITH 2 FACTORS ONLY AND ONE FIXED VARIABLES. # MODEL HAS TO BE OF THE FORM "a" if (length(term) > 1) { PrintMsg(paste('"error":"The model cannot include more than 1 variable when using t-test. Please provide an appropriate model. The current model is: ', model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE) } samples <- unique(unlist(data[, model])) if (length(samples) != 2) { PrintMsg(paste('"error":"A t-test can only perform two samples comparison. Please provide an appropriate model. The current model includes the following samples: ', - paste(sample, collapse=', '), + paste(samples, collapse=', '), '."', sep=''), verbose, TRUE) } ttest.output <- t.test(formula=as.formula(formula), data=data) PrintMsg(paste('"description":"A paired t-test was performed using the model: ', model, '."', sep=''), verbose) summary <- data.frame(row.names=paste(samples, collapse='-'), p.value=ttest.output$p.value) pairwise.output <- list() pairwise.output[[model]] <- as.data.frame(ttest.output$p.value) names(pairwise.output[[model]]) <- model row.names(pairwise.output[[model]]) <-paste(samples, collapse='-') } else if (method == 'ttest_paired') { # PARAMETRIC, ACCEPTS MODELS WITH 2 FACTORS ONLY AND ONE FIXED VARIABLES. NESTING IS ASSUMED FROM THE ORDER OF THE DATA! # MODEL HAS TO BE OF THE FORM "a" if (length(term) > 1) { PrintMsg(paste('"error":"A model cannot include more than 1 variable when using t-test. Please provide an appropriate model. The current model is: ', model, ' (', length(term), ' variables)."', sep=''), verbose, TRUE) } samples <- unique(unlist(data[, model])) if (length(samples) != 2) { PrintMsg(paste('"error":"A paired t-test can only perform two samples comparison. Please provide an appropriate model. The current model includes the following samples: ', - paste(sample, collapse=', '), + paste(samples, collapse=', '), '."', sep=''), verbose, TRUE) } x <- data[data[, model] == samples[1], model] y <- data[data[, model] == samples[2], model] if (length(x) != length(y)) { PrintMsg(paste('"error":"A paired comparison in t-test is possible only if samples have same size. Please provide an appropriate model, correct the map or use different statistics. The current samples are: ', samples[1], '(n=', length(x), '), ', samples[2], '(n=', length(y), ')."', sep=''), verbose, TRUE) } ttest.output <- t.test(formula=as.formula(formula), data=data, paired=TRUE) PrintMsg(paste('"description":"A paired t-test was performed using the model: ', model, '. Paires: ', paste(paste(row.names(data)[data[, model] == samples[1]], row.names(data)[data[, model] == samples[2]], sep='-'), collapse=', '), '."', sep=''), verbose) summary <- data.frame(row.names=paste(samples, collapse='-'), p.value=ttest.output$p.value) pairwise.output <- list() pairwise.output[[model]] <- as.data.frame(ttest.output$p.value) names(pairwise.output[[model]]) <- model row.names(pairwise.output[[model]]) <-paste(samples, collapse='-') } stat <- list(summary=summary, pairwise=pairwise.output) return (stat) } ################ # Plot rarefaction curves # Args: # (1) data (numeric matrix) rarefaction table # Output to device: # A figure ################ PlotRarefactionCurves <- function(data=NULL, xlab='Rarefaction depth', ylab='') { color.palette <- rainbow(ncol(data)) plot(x=row.names(data), y=c(1:nrow(data)), type="n", ylim=c(min(data), max(data)), xlab=xlab, ylab=ylab) for (i in 1:ncol(data)) { lines(x=row.names(data), y=data[, i], col=color.palette[i]) } return () } ################ # Perform a diversity analysis # Args: # (1) table (numeric dataframe) Count table # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) fun (character) The diversity function to use # (4) model (formula vector) A formula representing the model for stats. Terms # must refer to names(map) # (5) json (character) Json string specifying packages # (6) verbose (character) Print messages? see PrintMsg() options # (7) graphical (logical) Generate graphics? # Returns: # Figure data as a json string # Output to device: # A figure # Prints to stdout: # A description of the function ################ AnalyseDiversity <- function(table=NULL, map=NULL, fun=c('richness', 'shannon'), nrar=20, compare_diversity=FALSE, stats=NULL, model=NULL, json=libJson, verbose=TRUE, graphical=TRUE) { if (is.null(fun)) { fun <- 'richness' } suppressMessages(library('vegan')) fun <- fun[fun != ''] # Round counts if not integers if (length(table[table < 0]) > 0) { PrintMsg('"error":"Rarefaction is not applicable because negative values were detected."', verbose, TRUE) return () } if (length(table[table%%1 > 0]) > 0) { PrintMsg('"warning":"Non-integer values were detected. In order to permit rarefaction, these values were rounded to the nearest integer."', verbose) table <- round(table, digits=0) } # Compute rarefaction curves for diversity colsums <- colSums(table) min.colsum <- floor(min(colsums)) ncol <- ncol(table) sample <- matrix(rep(floor(seq(from=0, to=min.colsum, length.out=nrar)), ncol), ncol=ncol, byrow=FALSE) diversity <- list() map <- FilterMap(map=map, model=model) rarefied.table <- list() for (i in 1:nrar) { rarefied.table[[i]] <- rrarefy(t(table), sample=apply(rbind(sample[i, ], colsums), 2, min)) } PrintMsg('"description":"This is a diversity analysis."', verbose) PrintMsg(paste('"description":"The dataset was rarefied until ', min.colsum, ' counts per sample."', sep=''), verbose) data.json <- c(1:length(fun)) for (i in 1:length(fun)) { div <- matrix(ncol=ncol, nrow=0) v <- TRUE for (j in 1:nrar) { div <- rbind(div, ComputeDiversity(table=t(rarefied.table[[j]]), fun=fun[i], json=json, verbose=v)) v <- FALSE } colnames(div) <- colnames(table) row.names(div) <- sample[, 1] diversity[[i]] <- as.matrix(div) # Build figures if (graphical == TRUE) { PlotRarefactionCurves(div, ylab=fun[i]) } # Calculate statistics if (compare_diversity == TRUE) { p.value <- list() for (j in 1:length(model)) { if (length(stats) == length(model)) { method <- stats[j] } else { method <- stats[1] } stat <- ComputeStats(response=div[nrar, ], map=map, method=method, model=model[j], pairwise=FALSE, verbose=TRUE)$summary p <- as.data.frame(t(as.matrix(stat$p.value))) names(p) <- row.names(stat) p <- as.data.frame(p[, names(p) != 'Residuals']) # the following is usefull in case model contains only one element names(p) <- row.names(stat)[row.names(stat) != 'Residuals'] row.names(p) <- fun[i] p.value[[j]] <- p } PrintMsg(paste('"description":"The statistics were performed at a rarefaction depth of ', min.colsum, ' counts per sample."', sep=''), verbose) } else { p.value <- NULL } data.json[i] <- paste('"', fun[i], '":', diversity2json(diversity[[i]], p.value, map), sep='') } return (paste('{', paste(data.json, collapse=','), '}', sep='')) } ################ # Permutational Multivariate Analysis of Variance Using Distance Matrices # Args: # (1) model (character) # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) fun (character) The distance function to use # (4) graphical (logical) Generate graphics? # Returns: # A summary (data-frame) # Prints to stdout: # A description of the function ################ Adonis <- function(table=NULL, model=NULL, strata=NULL, map=NULL, fun='bray', graphical=TRUE) { if (!is.null(model)) { formula <- paste('table ~ ', model, sep='') } else { formula <- paste('table ~ ', paste(names(map), collapse='*'), sep='')} if (!is.null(strata)) { if (strata == '') { strata == NULL } else { #formula <- paste(formula, strata, sep=' / ') strata <- map[, strata] } } adonis.output <- adonis(formula=as.formula(formula), data=as.data.frame(map), strata=strata, method=fun, permutations=1000) summary <- data.frame(row.names=row.names(adonis.output$aov.tab)) summary <- data.frame(row.names=gsub('[[:space:]]', '', row.names(adonis.output$aov.tab)), p.value=adonis.output$aov.tab[, 'Pr(>F)'], explained=adonis.output$aov.tab[, 'R2']) summary <- summary[row.names(summary) != 'Total', ] # Build the figure if (graphical == TRUE) { pie(summary$explained, labels=row.names(summary), col=rainbow(length(summary$explained)), init.angle=0, radius=0.7, cex=0.7, xpd=TRUE) legend(x=-1, y=-1, xjust=1, yjust=0.5, title='p-value', legend=round(summary$p.value, digits=4), fill=rainbow(nrow(summary)), cex=0.7, xpd=TRUE) } return (summary) } ################ # Permutational Multivariate Analysis of Variance Using Distance Matrices # Args: # (1) table (numeric dataframe) Count table # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) fun (character) The distance function to use # (4) model (character vector) A formula representing the model for stats. Terms # must refer to names(map) # must refer to names(map) # (5) verbose (character) Print messages? see PrintMsg() options # (6) graphical (logical) Generate graphics? # Returns: # Figure data as a json string # Output to device: # A figure # Prints to stdout: # A description of the function ################ PerformAdonis <- function(table=NULL, map=NULL, fun='bray', model=NULL, strata=NULL, json=libJson, verbose=TRUE, graphical=TRUE) { suppressMessages(library('vegan')) #out.json <- c(1:length(fun)) #for (k in 1:length(fun)) { spec <- as.data.frame(do.call('rbind', json$distance)) spec <- spec[spec$value == fun, ] package <- '' pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=',')) if (length(pkg) > 0) { package <- paste(' (R package {', unlist(spec$pkg), '})', sep='') } PrintMsg(paste('"description":"This is a permutational multivariate analysis of variance using the Adonis method (R package {vegan}) based on the ', spec$label, package, '."', sep=''), verbose) # Correct for negative entries by adding a scalar (Caillez correction method) if (min(table) < 0) { c <- abs(min(table)) table <- table+c PrintMsg(paste('"description":"Negative values were corrected using a Caillez correction (', c, ' added to all values)."', sep=''), verbose) } if (is.null(strata) || strata == '') { strata <- rep(NULL, length(model)) } if (!is.null(strata) && length(strata) != length(model)) { PrintMsg(paste('"warning":"Number of strata (', length(strata), ') differs from number of models(', length(model), '). No strata will be used."', sep=''), verbose) strata <- rep(NULL, length(model)) } # Perform perMANOVA #map <- FilterMap(map=map, model=model) data.json <- c(1:length(model)) for (i in 1:length(model)) { stat <- Adonis(table=t(table), model=model[i], strata=strata[i], map=map, fun=fun, graphical=graphical) stat.json <- c(1:nrow(stat)) for (j in 1:nrow(stat)) { stat.json[j] <- paste('{"name":"', row.names(stat)[j], '","p-value":"', stat$p.value[j], '","explained":"', stat$explained[j], '"}', sep='') } data.json[i] <- paste('"', model[i], '":[', paste(stat.json, collapse=','), ']', sep='') } #out.json[k] <- paste('"', # fun[k], # '":"{', # paste(data.json, collapse=','), # '}', # sep='') #} #return (paste('{',paste(out.json, collapse=','), '}', sep='')) return (paste('{',paste(data.json, collapse=','), '}', sep='')) } ################ # Principal Component Analysis (PCA) # Args: # (1) table (numeric dataframe) Count table # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) model (character vector) A formula representing the model for stats. Terms # must refer to names(map) # (4) col (character) Name of a column in map to use to define colors # (5) biplot (numeric) Show n=biplot factors with biggest norms on the # biplot # (6) verbose (character) Print messages? see PrintMsg() options # (7) graphical (logical) Generate graphics? # Returns: # Figure data as a json string # Output to device: # A figure # Prints to stdout: # A description of the function ################ PerformPCA <- function(table=NULL, map=NULL, biplot=TRUE, verbose=TRUE, graphical=TRUE) { suppressMessages(library('FactoMineR')) ncp <- min(max(2, ncol(table)), 5) pca.output <- PCA(t(table), ncp=ncp, scale.unit=FALSE, graph=FALSE) if (graphical == TRUE) { plot(pca.output, new.plot=FALSE) } # Pack data into a json string data.ind <- as.data.frame(t(as.data.frame(pca.output$ind$coord))) row.names(data.ind) <- gsub('Dim.', 'PC', row.names(data.ind)) data.json <- Dataframe2DataJson(data=data.ind, xmetadata=map) eig.json <- c(1:ncp) for (i in 1:ncp) { eig.json[i] <- paste('"PC', i, '":"', pca.output$eig[i, 2], '"', sep='') } if (biplot == TRUE) { data.var <- as.data.frame(t(as.data.frame(pca.output$var$coord))) row.names(data.var) <- gsub('Dim.', 'PC', row.names(data.var)) data.json <- paste('{"ind":', data.json, ',"var":', Dataframe2DataJson(data=data.var), ',"eig":{', paste(eig.json, collapse=','), '}}', sep='') } else { data.json <- paste('{"data":', data.json, ',"eig":{', paste(eig.json, collapse=','), '}}', sep='') } PrintMsg('"description":"This is a principal component analysis (PCA) (R package {FactoMineR})."', verbose) return (data.json) } ################ # Correspondence Analysis (CA) # Args: # (1) table (numeric dataframe) Count table # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) model (character vector) A formula representing the model for stats. Terms # must refer to names(map) # (4) col (character) Name of a column in map to use to define colors # (5) biplot (numeric) Show n=biplot factors with biggest norms on the # biplot # (6) verbose (character) Print messages? see PrintMsg() options # (7) graphical (logical) Generate graphics? # Returns: # Figure data as a json string # Output to device: # A figure # Prints to stdout: # A description of the function ################ PerformCA <- function(table=NULL, map=NULL, verbose=TRUE, graphical=TRUE) { suppressMessages(library('FactoMineR')) ncp <- min(max(2, ncol(table)), 5) ca.output <- CA(table, ncp=ncp, graph=FALSE) if (graphical == TRUE) { plot(ca.output, new.plot=FALSE) } # Pack data into a json string data.ind <- as.data.frame(t(as.data.frame(ca.output$col$coord))) data.json <- Dataframe2DataJson(data=data.ind, xmetadata=map) eig.json <- c(1:ncp) for (i in 1:ncp) { eig.json[i] <- paste('"Dim', i, '":"', ca.output$eig[i, 2], '"', sep='') } data.var <- t(as.data.frame(ca.output$row$coord)) data.json <- paste('{"ind":', data.json, ',"var":', Dataframe2DataJson(data=data.var), ',"eig":{', paste(eig.json, collapse=','), '}}', sep='') PrintMsg('"description":"This is a correspondence analysis (CA) (R package {FactoMineR})."', verbose) return (data.json) } ################ # Principal Coordinates Analysis (PCoA) # Args: # (1) table (numeric dataframe) Count table # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) fun (character) The distance function to use # (4) model (character vector) A formula representing the model for stats. Terms # must refer to names(map) # (5) col (character) Name of a column in map to use to define colors # (6) json (character) Json string specifying packages # (7) verbose (character) Print messages? see PrintMsg() options # (8) graphical (logical) Generate graphics? # Returns: # Figure data as a json string # Output to device: # A figure # Prints to stdout: # A description of the function ################ PerformPCoA <- function(table=NULL, map=NULL, fun='bray', json=libJson, verbose=TRUE, graphical=TRUE) { spec <- as.data.frame(do.call('rbind', json$distance)) spec <- spec[spec$value == fun, ] package <- '' pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=',')) if (length(pkg) > 0) { for (i in 1:length(pkg)) { suppressMessages(library(pkg[i], character.only=TRUE)) } package <- paste(' (R package {', unlist(spec$pkg), '})', sep='') } dist <- as.matrix(ComputeDistance(table=Transpose(table), fun=fun, json=json, verbose=FALSE)) row.names(dist) <- names(table) data.json <- PerformPCA(table=dist, map=map, biplot=FALSE, verbose=FALSE, graphical=graphical) PrintMsg(paste('"description":"This is a principal coordinate analysis (PCoA) (R package {FactoMineR}) based on ', spec$label, package, '."', sep=''), verbose) return (data.json) } ################ # Canonical Correlation Analysis (CCA) # Args: # (1) table (numeric dataframe) Count table # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) model (character vector) A formula representing the model for stats. Terms # must refer to names(map) # (4) col (character) Name of a column in map to use to define colors # (5) biplot (numeric) Show n=biplot factors with biggest norms on the # biplot # (6) verbose (character) Print messages? see PrintMsg() options # (7) graphical (logical) Generate graphics? # Returns: # Figure data as a json string # Output to device: # A figure # Prints to stdout: # A description of the function ################ PerformCCA <- function(table=NULL, map=NULL, column=NULL, verbose=TRUE, graphical=TRUE) { suppressMessages(library('vegan')) table <- as.data.frame(t(table)) names <- unlist(strsplit(column, split=',')) categs <- as.data.frame(map[, names]) names(categs) <- names row.names(categs) <- row.names(map) cca.output <- cca(table ~ ., data=categs) if (graphical == TRUE) { plot(cca.output) } # Pack data into a json string data.ind <- as.data.frame(t(cca.output$CCA$u)) data.var <- as.data.frame(t(cca.output$CCA$v)) data.biplot <- as.data.frame(t(cca.output$CCA$biplot)) data.json <- paste('{"ind":', Dataframe2DataJson(data=data.ind, xmetadata=map), ',"var":', Dataframe2DataJson(data=data.var), ',"biplot":', Dataframe2DataJson(data=data.biplot), '}', sep='') PrintMsg(paste('"description":"This is a canonical correspondence analysis (CCA) (R package {vegan}) with the following constraining variable(s):', column, '."', sep=''), verbose) return (data.json) } ################ # Compute fold-change # Args: # (1) response (numeric vector) Data # (2) factor (vector) Experimental design vector with # nrow=length(response) # (3) by (dataframe) A nested variable to use for calculating # fold-changes # (4) balanced (bool) assume that model balanced? If NULL, balance will be tested # (5) verbose (character) Print messages? see PrintMsg() options # Returns: # A data-frame containing fold-changes for each variable in model # Prints to stdout: # A description of the function ################ ComputeFoldChange <- function(group1=NULL, group2=NULL, names=NULL, verbose=TRUE) { out <- list() out[['logfc']] <- log2(mean(group2))-log2(mean(group1)) out[['ab']] <- mean(c(group2, group1)) PrintMsg(paste('"description":"Fold-change is calculated as log2(mean(', names[2], '))-log2(mean(', names[1], ')) and abundance is calculated as (mean(', names[1], ')+mean(', names[2], '))/2."', sep=''), verbose) return(out) } ################ # Extract group of samples from map # Args: # (1) map (data-frame) Experimental design table with nrow=ncol(table) # Returns: # Prints to stdout: ################ ExtractGroupFromMap <- function(map=NULL, column=NULL, group=NULL, verbose) { map <- as.data.frame(FilterMap(map=map, model=column)) group <- unlist(strsplit(group, split=':')) if (ncol(map) != length(column)) { PrintMsg('"error":"Unable to determine groups for fold-change analysis. Make sure no - or : character is present in the mapping file."' , verbose) } keep <- 1 for (i in 1:ncol(map)) { keep <- keep*(map[, i] == group[i]) } return(keep) } ################ # Analyse changes (fold-changes, p-values etc.) # Args: # (1) table (numeric dataframe) Count table # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) model (character vector) A formula representing the model for stats. Terms # must refer to names(map) # (4) json (character) Json string specifying packages # (5) verbose (character) Print messages? see PrintMsg() options # Returns: # Figure data as a json string # Prints to stdout: # A description of the function ################ AnalyseChange <- function(table=NULL, map=NULL, stats=NULL, model=NULL, json=libJson, verbose=TRUE, graphical=FALSE) { PrintMsg('"description":"This is a differential analysis (fold-changes and statistical tests)."', verbose) # Calculate statistics if (is.null(model)) { model <- paste(names(map), sep='+') } nmodel <- length(model) data.json <- c(1:nmodel) for (i in 1:nmodel) { if (length(stats) != length(model)) { method = stats[1] } else { method = stats[i] } nrow <- nrow(table) effect.json <- list() v <- verbose graphic.data <- list() for (j in 1:nrow) { stat <- ComputeStats(response=as.vector(unlist(table[j, ])), map=map, method=method, model=model[i], pairwise=TRUE, verbose=v)$pairwise effect.names <- names(stat) neffect <- length(effect.names) for (k in 1:neffect) { if (j == 1 && k == 1) { graphic.data[[effect.names[k]]] <- list() } p <- unlist(stat[[k]][, ncol(stat[[k]])]) if (length(effect.json) == 0) { effect.json[[effect.names[k]]] <- list() } comp.names <- row.names(stat[[k]]) ncomp <- length(comp.names) for (l in 1:ncomp) { if (j == 1 && l == 1) { graphic.data[[effect.names[k]]][[comp.names[l]]] <- data.frame(logfc=c(1:nrow), ab=c(1:nrow)) row.names(graphic.data[[effect.names[k]]][[comp.names[l]]]) <- row.names(table) } if (length(effect.json[[effect.names[k]]]) == 0) { effect.json[[effect.names[k]]][[comp.names[l]]] <- list() effect.json[[effect.names[k]]][[comp.names[l]]] <- c(1:nrow) } group.names <- unlist(strsplit(row.names(stat[[k]])[l], split='-')) if (length(group.names) != 2) { PrintMsg('"error":"Unable to determine groups for fold-change analysis. Make sure no - or : character is present in the mapping file."', verbose) } group1 <- ExtractGroupFromMap(map=map, column=names(stat)[k], group=group.names[1], verbose) group2 <- ExtractGroupFromMap(map=map, column=names(stat)[k], group=group.names[2], verbose) fc <- ComputeFoldChange(as.vector(unlist(table[j, group1 == 1])), as.vector(unlist(table[j, group2 == 1])), names=group.names, verbose=v) effect.json[[effect.names[k]]][[comp.names[l]]][j] <- paste('{"mean abundance":"', fc$ab, '","log2(fold-change)":"', fc$logfc, '","-log2(p-value)":"', -log2(p[l]), '"}', sep='') graphic.data[[effect.names[k]]][[comp.names[l]]]$logfc[j] <- fc$logfc graphic.data[[effect.names[k]]][[comp.names[l]]]$ab[j] <- fc$ab } } v <- FALSE } for (k in 1:length(effect.json)) { for (l in 1:length(effect.json[[k]])) { effect.json[[k]][[l]] <- paste('"', names(effect.json[[k]])[l], '":[', paste(effect.json[[k]][[l]], collapse=','), ']', sep='') if (graphical == TRUE) { plot(graphic.data[[k]][[l]]$ab, graphic.data[[k]][[l]]$logfc, main=names(effect.json[[k]])[l], xlab='mean abundance', ylab='log2(fold-change)') } } effect.json[[k]] <- paste('"', names(effect.json)[k], '":{', paste(effect.json[[k]], collapse=','), '}', sep='') } data.json[i] <- paste('"', model[i], '":{', paste(effect.json, collapse=','), '}', sep='') } return(paste('{"data":{', paste(data.json, collapse=','), '},"names":["', paste(row.names(table), collapse='","'), '"]}', sep='')) } ################ # Build a heatmap of abundances # Args: # (1) table (numeric dataframe) Count table # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) model (character vector) A formula representing the model for stats. Terms # must refer to names(map) # (4) metadata (numeric dataframe) Metadata table # (5) fun (character) Function to use in hierarchical clustering # (6) json (character) Json string specifying packages # (7) verbose (character) Print messages? see PrintMsg() options # (8) graphical (logical) Generate graphics? # Returns: # Figure data as a json string # Output to device: # A figure # Prints to stdout: # A description of the function ################ BuildHeatMap <- function(table=NULL, map=NULL, stats='anova', model=NULL, secondary=NULL, fun='spearman', json=libJson, verbose=TRUE, graphical=TRUE) { suppressMessages(library('gplots')) PrintMsg('"description":"This is a heat-map of abundances."', verbose=verbose) # Check that there is no constant data RemoveConstantRows(table=table, verbose=TRUE) if ((nrow(table) < 3) || (ncol(table) < 3)) { PrintMsg('"error":"Not enough data to draw a heatmap (min 3 rows and 3 columns)."', TRUE, TRUE) } # Center/scale by row data <- as.data.frame(Transpose(apply(table, 1, cScale))) names(data) <- names(table) row.names(data) <- row.names(table) PrintMsg('"description":"Abundances are scaled on standard deviation and centered on mean per row."', verbose=verbose) # keep only columns of map that are used in the model map <- FilterMap(map=map, model=model) # Set function for hierarchical clustering DistFun <- function(x) { return (ComputeDistance(table=x, fun=fun, json=json, verbose=FALSE)) } if (graphical == FALSE) { pdf(file = NULL) } # Set the color palette col <- colorRampPalette(c("navy", "seashell", "red2"))(n=51) heatmap.out <- heatmap.2(as.matrix(data), scale='row', na.rm=TRUE, Rowv=TRUE, Colv=TRUE, dendrogram='both', #distfun=DistFun, col=col, symm=FALSE, symkey=FALSE, key=TRUE, keysize=1.5, density.info='density', trace='none', labCol=names(data), labRow=row.names(data), cexCol=0.2+1/log10(ncol(data)), mar=c(5, 10)) if (graphical == FALSE) { dev.off() } # Order data according to dendrogram data <- data[heatmap.out$rowInd, ] data <- data[, heatmap.out$colInd] names <- names(map) map <- as.data.frame(map[heatmap.out$colInd, ]) names(map) <- names if (!is.null(secondary)) { secondary <- secondary[, heatmap.out$colInd] } # Store heatmap data into a json string ncol <- ncol(data) heatmap.json <- c(1:ncol) data.names <- names(data) for (i in 1:ncol) { heatmap.json[i] <- paste('{"name":"', data.names[i], '","value":["', paste(data[, i], collapse='","'), '"]}', sep='') } # Add categories to topbar topbar.json <- list() ncateg <- ncol(map) category.json <- c(1:ncateg) categ.names <- names(map) for (i in 1:ncateg) { category.json[i] <- paste('{"name":"', categ.names[i], '","value":["', paste(map[, i], collapse='","'), '"]}', sep='') } topbar.json[['category']] <- paste('"category":[', paste(category.json, collapse=','), ']', sep='') # Add p-values to sidebar sidebar.json <- list() if (is.null(model)) { model <- paste(names(map), sep='+') } nmodel <- length(model) stat.json <- c(1:nmodel) for (i in 1:nmodel) { if (length(stats) != length(model)) { method = stats[1] } else { method = stats[i] } nrow <- nrow(data) v <- verbose p.value <- list() for (j in 1:nrow) { stat <- ComputeStats(response=as.vector(unlist(data[j, ])), map=map, method=method, model=model[i], pairwise=FALSE, verbose=v)$summary stat.rownames <- row.names(stat)[row.names(stat) != 'Residuals'] stat <- as.data.frame(stat[row.names(stat) != 'Residuals', ]) nstat <- nrow(stat) for (k in 1:nstat) { if (is.null(p.value[[stat.rownames[k]]])) { p.value[[stat.rownames[k]]] <- c(1:nrow) } p.value[[stat.rownames[k]]][j] <- stat[k, ] } v <- FALSE } for (j in 1:length(p.value)) { p.value[[j]] <- paste('{"name":"', names(p.value)[j], '","value":["', paste(p.value[[j]], collapse='","'), '"]}', sep='') } stat.json[i] <- paste('"', model[i], '":[', paste(p.value, collapse=','), ']', sep='') } sidebar.json[['p-values']] <- paste('"p-values":{', paste(stat.json, collapse=','), '}', sep='') # Add correlations with secondary dataset to sidebar if (! is.null(secondary)) { PrintMsg('"description":"The correlations between the primary and secondary datasets were calculated using the Spearman rank correlation."', verbose=verbose) correlation <- as.data.frame(cor(x=t(data), y=t(secondary), use='na.or.complete', method ="spearman")) ncor <- ncol(correlation) cor.json <- c(1:ncor) for (i in 1:ncor) { cor.json[i] <- paste('{"name":"', names(correlation)[i], '","value":["', paste(correlation[, i], collapse='","'), '"]}', sep='') } sidebar.json[['correlations']] <- paste('"correlations":[', paste(cor.json, collapse=','), ']', sep='') } data.json <- paste('{"heatmap":[', paste(heatmap.json, collapse=','), '],"colnames":["', paste(names(data), collapse='","'), '"],"rownames":["', paste(row.names(data), collapse='","'), '"],"topbar":{', paste(topbar.json, collapse=','), '},"sidebar":{', paste(sidebar.json, collapse=','), '},"colDendrogram":', dendrogram2json(heatmap.out$colDendrogram)[[1]], ',"rowDendrogram":', dendrogram2json(heatmap.out$rowDendrogram)[[1]], '}', sep='') return (data.json) } ################ # Get the groups with the highest mean # Args: # (1) data (numeric vector) The data # (2) map (data-frame) Experimental design table with nrow=length(data) # (3) model (character) A formula representing the model for stats. Terms # must refer to names(map) # (4) verbose (character) Print messages? see PrintMsg() options # Returns: # A character vector with length=numer of terms in model # Prints to stdout: # A description of the function ################ getGroupWithHighestMean <- function(data=NULL, map=NULL, column=NULL, verbose=TRUE) { PrintMsg('"description":"Determine groups with highest mean abundance."', verbose=verbose) map <- as.data.frame(map[, unlist(strsplit(column, split=':'))]) if (ncol(map) == 1) { names(map) <- column } group <- as.data.frame(unique(map)) if (ncol(group) == 1) { names(group) <- column } mean <- c(1:nrow(group)) for (i in 1:nrow(group)) { keep <- 1 for (j in 1:ncol(map)) { keep <- keep*(map[, j] %in% group[i, names(map)[j]]) } mean[i] <- mean(unlist(data[as.logical(keep)])) } out <- list() out$value <- max(mean) out$id <- paste(unlist(group[which(mean == max(mean)), ]), collapse=':') out$group <- apply(group, 1, paste, collapse=':') return (out) } ################ # Build a network of correlations for variables # Args: # (1) table (numeric dataframe) Count table # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) model (character) A formula representing the model for stats. Terms # must refer to names(map) # (4) metadata (numeric dataframe) Metadata table # (5) fun (character) Distance, correlation or similarity function # (6) json (character) Json string specifying packages # (7) verbose (character) Print messages? see PrintMsg() options # Returns: # Figure data as a json string # Prints to stdout: # A description of the function ################ BuildCorrelationNetwork <- function(table=NULL, map=NULL, stats='anova', model=NULL, secondary=NULL, fun='spearman', json=libJson, verbose=TRUE) { spec <- as.data.frame(do.call('rbind', json$correlation)) spec <- spec[spec$value == fun, ] pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=',')) package <- '' if (length(pkg) > 0) { for (i in 1:length(pkg)) { suppressMessages(library(pkg[i], character.only=TRUE)) } package <- paste(' (R package {', unlist(spec$pkg), '})', sep='') } PrintMsg(paste('"description":"This is a correlation network. It was built using ', spec$label, package, '."', sep=''), verbose=verbose) suppressMessages(library(igraph)) # keep only columns of map that are used in the model map <- FilterMap(map=map, model=model) # Combine with secondary dataset data <- rbind(table, secondary) data.type <- rep('primary dataset', nrow(data)) if (!is.null(secondary)) { data.type[(nrow(table)+1):nrow(data)] <- "secondary dataset" } # Check that there is no constant data RemoveConstantRows(table=data, verbose=TRUE) if ((nrow(data) < 3) || (ncol(data) < 3)) { PrintMsg('"error":"Not enough data to compute a network (min 3 rows and 3 columns)."', TRUE, TRUE) } # Calculate the correlation matrix weight <- ComputeCorrelation(table=data, fun=fun, test=TRUE, json=json, verbose=FALSE) nmodel <- length(model) nnodes <- nrow(data) node.json <- c(1:nnodes) link.json <- c(1:(nnodes*(nnodes/2-1))) l <- 1 legend.json <- list() for (i in 1:nnodes) { model.json <- c(1:nmodel) for (j in 1:nmodel) { + if (length(stats) != length(model)) { + method = stats[1] + } else { + method = stats[i] + } stat <- ComputeStats(response=as.vector(unlist(data[i, ])), map=map, - method=stats, model=model[j], pairwise=FALSE, + method=method, model=model[j], pairwise=FALSE, verbose=FALSE)$summary neffect <- length(row.names(stat)[row.names(stat) != 'Residuals']) effect.json <- c(1:neffect) if (length(legend.json[[model[j]]]) == 0) { legend.json[[model[j]]] <- c(1:neffect) } for (k in 1:neffect) { gwhm <- getGroupWithHighestMean(data=data[i, ], map=map, column=row.names(stat)[k], verbose=FALSE) effect.json[k] <- paste('"', row.names(stat)[k], '":{"p-value":"', stat[k, ], '","highest-mean":"', gwhm$id, '"}', sep='') if (i == 1) { legend.json[[model[j]]][k] <- paste('"', row.names(stat)[k], '":["', paste(gwhm$group, collapse='","'), '"]', sep='') } } model.json[j] <- paste('"', model[j], '":{', paste(effect.json, collapse=','), '}', sep='') if (i == 1) { legend.json[[model[j]]] <- paste('"', model[j], '":{', paste(legend.json[[model[j]]], collapse=','), '}', sep='') } } node.json[i] <- paste('{"id":"', i-1, '","name":"', row.names(data)[i], '","data-type":"', data.type[i], '","stat":{', paste(model.json, collapse=','), '},"mean":"', mean(unlist(data[i, ])), '","min":"', min(unlist(data[i, ])), '","max":"', max(unlist(data[i, ])), '"}', sep='') if (i < nnodes) { for (j in (i+1):nnodes) { link.json[l] <- paste('{"source":"', i-1, '","target":"', j-1, '","weight":"', weight$estimate[i, j], '","p-value":"', weight$p.value[i, j], '"}', sep='') l <- l+1 } } } return (paste('{"nodes":[', paste(node.json, collapse=','), '],"links":[', paste(link.json, collapse=','), '],"legend":{', paste(unlist(legend.json), collapse=','), '}}', sep='')) } ################ # Build a network of similarity for samples # Args: # (1) tables (list of numeric dataframes) # (2) map (data-frame) Experimental design table with nrow=ncol(table) # (3) model (character) A formula representing the model for stats. Terms # must refer to names(map) # (4) funs (list of characters) Distance or similarity functions # (5) clust (character) Network clustering algorithm # (6) json (character) Json string specifying packages # (7) verbose (character) Print messages? see PrintMsg() options # (8) lib (character) Path to holynet library # Returns: # List [1] json string (fusion network) [2] data-frame (clusters) # Prints to stdout: # A description of the function ################ BuildSimilarityNetwork <- function(table=NULL, map=NULL, funs=NULL, clust="walktrap", clust.names=NULL, json=libJson, verbose=TRUE, lib='holynetlib.R') { source(lib) spec <- as.data.frame(do.call('rbind', json$distance)) for (i in 1:length(table)) { fun <- funs[1] if ((length(funs) == length(table)) && (length(funs) > 1)) { fun <- funs[i] } spec <- spec[spec$value == fun, ] pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=',')) package <- '' if (length(pkg) > 0) { for (j in 1:length(pkg)) { suppressMessages(library(pkg[j], character.only=TRUE)) } package <- paste(' (R package {', unlist(spec$pkg), '})', sep='') } PrintMsg(paste('"description":"This is a similarity network. It was built using ', spec$label, package, '."', sep=''), verbose=verbose) } # Calculate similarity matrices mat <- list() for (i in 1:length(table)) { fun <- funs[1] if (length(funs) == length(table)) { fun <- funs[i] } mat[[i]] <- CreateAdjacencyMatrix(CreateSimilarityMatrix(table[[i]], fun)) } # If multiple matrices exist, fuse them if (length(mat) > 1) { fusmat <- FuseMatrices(mat) if (is.null(fusmat)) { PrintMsg('"error":"Trying to fuse matrices but dimensions are not matching."', TRUE, TRUE) } mat[[(length(mat)+1)]] <- fusmat } out <- list() # Compute clustering on similarity matrices if (!is.null(clust) && (clust != 'none')) { spec <- as.data.frame(do.call('rbind', json$graph_clustering)) spec <- spec[spec$value == clust, ] pkg <- unlist(strsplit(as.character(unlist(spec$pkg)), split=',')) package <- '' if (length(pkg) > 0) { for (i in 1:length(pkg)) { suppressMessages(library(pkg[i], character.only=TRUE)) } package <- paste(' (R package {', unlist(spec$pkg), '})', sep='') } PrintMsg(paste('"description":"Clusters were determined using the ', spec$label, package, '."', sep=''), verbose=verbose) clusters <- matrix(ncol=length(mat), nrow=nrow(mat[[1]])) for (i in 1:length(mat)) { clusters[, i] <- paste(clust, as.character(ApplyNetworkClustering(CreateNetwork(mat[[i]]), clust_method=clust)), sep='') } clusters <- as.data.frame(clusters) if (!is.null(clust.names)) { names(clusters) <- clust.names } out[['txt']] <- clusters map <- cbind(map, clusters) } # Pack data into a json string json.data <- buildFusionNetworkJson(mat, map) out[['json']] <- json.data return (out) } ################################# # (\/)(°,,,°)(\/) # ################################# libJson <- ReadJson() diff --git a/lib/genocrunch_console/lib/genocrunchlib.py b/lib/genocrunch_console/lib/genocrunchlib.py index ed9dd68..a5395d0 100755 --- a/lib/genocrunch_console/lib/genocrunchlib.py +++ b/lib/genocrunch_console/lib/genocrunchlib.py @@ -1,1008 +1,1018 @@ # -*- coding: utf-8 -*- #genocrunchlib.py import os, sys, argparse, tarfile from json import load, loads, dump from subprocess import Popen, PIPE from shutil import copyfile, move, rmtree from itertools import izip from collections import OrderedDict from tempfile import mkdtemp, NamedTemporaryFile from random import choice from string import ascii_uppercase, digits from copy import copy from datetime import datetime __dir__ = os.path.dirname(__file__) main_dir = '/'.join(__dir__.split('/')[0:len(__dir__.split('/'))-1]) def Error(f=__file__, msg='', to_rm=[]): print 'Error ['+ os.path.basename(f) +']: '+ msg if len(to_rm) > 0: for t in to_rm: rmtree(t) raise SystemExit(1) def Warning(f=__file__, msg=''): print 'Warning ['+ os.path.basename(f) +']: '+ msg def mkdir(dp=''): """Create a directory if not already present Args: dp Directory path """ if not os.path.exists(dp): os.makedirs(dp) return 0 def rand(n=1): """Generate a random string n Length of the random string """ return ''.join(choice(ascii_uppercase + digits) for _ in range(n)) class Parameters(object): """Parameters retrieved from genocrunchlib.json, from user's json and passed in command line""" def __init__(self, json_fp=''): """Populate the object json_fp Json file path """ # Set authorized arguments for cli argument_parser = argparse.ArgumentParser() argument_parser._action_groups.pop() required_arguments = argument_parser.add_argument_group('required arguments') optional_arguments = argument_parser.add_argument_group('optional arguments') optional_arguments.add_argument('--params_fp', help='Path to a JSON file containing parameters.', nargs=1, type=str) optional_arguments.add_argument('--analysis', help='List of analysis to perform (comma-separated). Valid choices: proportions,diversity,adonis,pca,pcoa,heatmap,change,correlation_network,clustering,similarity_network', nargs=1, default='proportions,pcoa', type=str) required_arguments.add_argument('--output', help='Path to output directory.', nargs=1, type=str) # Define parameters based on the json self.json_fp = json_fp with open(self.json_fp, 'r') as f: self.json_s = load(f) self.fps = [] required = [] for key, value in self.json_s.get('fields').iteritems(): for val in value: if 'scope' not in val.keys() or val.get('scope') != 'form_only': if 'default' in val.keys(): default = val.get('default') else: default=None # Add authorized cli arguments from the json if 'optional' in val.keys() and val.get('optional') == False: required.append(val.get('id')) required_arguments.add_argument('--'+val.get('id'), help=val.get('help'), nargs=1, default=default, type=str) else: optional_arguments.add_argument('--'+val.get('id'), help=val.get('help'), nargs=1, default=default, type=str) if val.get('type') == 'file': self.fps.append(val.get('id')) # Retrieve cli arguments self.params = argument_parser.parse_args().__dict__ # Set params values from user's json if provided (does not overwrite parameters passed in command line) # Careful with booleans as form checkboxes return 1 or nothing... for key, value in self.params.iteritems(): if isinstance(value, list): self.params[key] = value[0] if self.params['params_fp'] is not None and os.path.exists(self.params['params_fp']): with open(self.params.get('params_fp'), 'r') as f: params = load(f) for key, value in params.items(): if '--'+key not in sys.argv[1:]: if value in ['', [], ['']]: self.params[key] = None else: self.params[key] = value # Handle booleans for e in ['prim_rarefaction', 'sec_rarefaction']: if e not in params.keys(): self.params[e] = False elif params[e] in ['', ' ', '0']: self.params[e] = False else: self.params[e] = bool(params[e]) if '--analysis' in sys.argv[1:]: for a in self.params['analysis'].split(','): self.params[a] = 1 self.params.pop('analysis') for key, value in self.params.iteritems(): if value is not None: if isinstance(value, str) and len(value.split(',')) > 1: self.params[key] = value.split(',') elif key in required: Error(msg='argument --'+key+' is requireds') # Check that all files exist missing = [] for fp in self.fps: if self.params[fp] is not None and not os.path.exists(self.params[fp]): missing.append('--'+fp+' '+self.params[fp]) if len(missing) > 0: Error(msg='The following file(s) were not found: '+' ,'.join(missing)) def write(self, fp=''): """Write all parameters to a file fp File path """ with open(fp, 'w') as f: dump(self.params, f, indent=2) def table2R(fp): """Adapt file format for easy loading in R: Remove comments from the top of the file (also empty first element of first line) """ sub = Popen([main_dir+'/bin/modify_table.sh', fp, 'table2R', fp]) sub.wait() def stdoutLog2json(stdout): """Convert stdout to json string""" return loads('[{'+'"},{"'.join(stdout.read().split('""'))+'}]') class Log(object): """A log file""" def __init__(self, fp): self.fp = fp self.data = [] self.update() def update(self): """Write log data to file""" with open(self.fp, 'w') as f: dump(self.data, f, indent=2) self.secure(self.fp+'.safe') def secure(self, fp=None): """Remove absolute paths from the log""" if fp is None: fp = self.fp path_to_hide = os.path.dirname(self.fp) f = open(self.fp, 'r') content = f.read() f.close() new_content = content.replace(path_to_hide,'') f = open(fp, 'w') f.write(new_content) f.close() def wrapup(self): """Update pending and runing status to failed and update""" f = open(self.fp, 'r') content = f.read() f.close() new_content = content.replace('running','failed').replace('pending','failed') f = open(self.fp, 'w') f.write(new_content) f.close() self.secure(self.fp+'.safe') class Archive(object): """An archive""" def __init__(self, target_fp='', name=None): self.fp = target_fp self.source = [] self.add(name) def add(self, name=None, update=False, generate=True): """Add files to the archive""" if name is not None: if isinstance(name, list): self.source = list(set(self.source + name)) else: self.source = list(set(self.source + [name])) if update and generate: self.update() elif generate: with tarfile.open(self.fp, "w:gz") as archive: archive.add(name) def update(self): """Update files in archive""" if os.path.exists(self.fp): tmp = NamedTemporaryFile(delete=False, dir=os.path.dirname(self.fp)) os.rename(self.fp, tmp.name) with tarfile.open(self.fp, "w:gz") as archive: for e in self.source: archive.add(e) class Map(object): """Mapping file""" def __init__(self, fp, output): self.fp = output+'/map.txt' copyfile(fp, self.fp) table2R(self.fp) self.log = [] self.log.append({'name':'file', 'type':'file', 'path':self.fp}) self.log.append({'name':'validation', 'type':'validation', 'status':'pending', 'messages':[]}) def validate(self): log = [e for e in self.log if e['name'] == 'validation'][0] log['status'] = 'running' start_time = datetime.now() log['start_time'] = str(start_time) sub = Popen([main_dir+'/bin/validate_format.R', '-m', 'validate_map', '--map', self.fp], stdout=PIPE, stderr=PIPE) sub.wait() log['status'] = 'completed' log['messages'] = stdoutLog2json(sub.stdout) if len([e for e in log['messages'] if 'error' in e.keys()]) > 0: log['status'] = 'failed' Error(msg='Error detected in '+self.fp) log['status'] = 'completed' log['execution_time'] = str(datetime.now() - start_time) class DataSet(object): """Dataset""" _pipeline = [{'name':'pre-processing', 'fun':'preProcess', 'operations':[ 'filtering', 'binning' ]}, {'name':'transformation', 'fun':'transform', 'operations':[ 'rarefaction', 'transformation', 'batch_effect_suppression' ]} ] def __init__(self, json_fp = '', pipeline = [], data_fp = '', output = '', map = '', category_column = '', abundance_threshold = '', abundance_threshold_type = '', presence_threshold = '', presence_threshold_type = '', bin_levels = [], bin_fun = '', rarefy = False, rarefaction_depth = '', nsampling = '', transformation = '', batch_effect = '', batch_effect_fun = ''): self.json_fp = json_fp self.pipeline = self._pipeline if pipeline != []: self.pipeline = pipeline self.output = output self.data_fp = [self.output+'/'+os.path.basename(data_fp)] self.tmpdir = os.path.normpath(self.output+'/../'+mkdtemp()) mkdir(self.tmpdir) copyfile(data_fp, self.data_fp[0]) table2R(self.data_fp[0]) self.map = map self.category_column = category_column self.abundance_threshold = abundance_threshold self.abundance_threshold_type = abundance_threshold_type self.presence_threshold = presence_threshold self.presence_threshold_type = presence_threshold_type self.bin_levels = bin_levels self.bin_fun = bin_fun self.rarefy = rarefy self.rarefaction_depth = rarefaction_depth self.nsampling = nsampling self.transformation = transformation self.batch_effect = batch_effect self.batch_effect_fun = batch_effect_fun self.log = [] self.log.append({'name':'file', 'type':'file', 'path':self.data_fp[0]}) self.log.append({'name':'validation', 'type':'validation', 'status':'pending', 'messages':[]}) self.log.append({'name':'sorting', 'type':'operation', 'status':'pending', 'messages':[]}) for p in self.pipeline: op = [] for e in p['operations']: op.append({'name':e, 'status':'pending', 'messages':[]}) self.log.append({'name':p['name'], 'type':'step', 'operations':op}) op = [] self.stdout = [] self.stderr = [] def validate(self): log = [e for e in self.log if e['name'] == 'validation'][0] log['status'] = 'running' start_time = datetime.now() log['start_time'] = str(start_time) for fp in self.data_fp: sub = Popen([main_dir+'/bin/validate_format.R', '-t', fp, '-m', 'validate_dataset', '--map', self.map.fp, '--category_column', str(self.category_column)], stdout=PIPE, stderr=PIPE) sub.wait() log['messages'].extend(stdoutLog2json(sub.stdout)) log['status'] = 'completed' log['execution_time'] = str(datetime.now() - start_time) def sort(self): log = [e for e in self.log if e['name'] == 'sorting'][0] log['status'] = 'running' start_time = datetime.now() log['start_time'] = str(start_time) tmp = NamedTemporaryFile(delete=False, dir=self.tmpdir) for fp in self.data_fp: sub = Popen([main_dir+'/bin/modify_table.R', '-t', fp, '-m', 'sorting', '--map', self.map.fp, '--ignore', str(self.category_column), '-o', fp, '--log', str(tmp.name)], stdout=PIPE, stderr=PIPE) sub.wait() self.appendStdout(sub.stdout.read()) self.appendStderr(sub.stderr.read()) if os.path.exists(tmp.name): with open(tmp.name, 'r') as t: log['messages'].extend(stdoutLog2json(t)) os.remove(tmp.name) tmp.close() log['status'] = 'completed' log['execution_time'] = str(datetime.now() - start_time) def preProcess(self, fun=''): log = [e for e in [o for o in self.log if o['name'] == 'pre-processing'][0]['operations'] if e['name'] == fun][0] log['status'] = 'running' start_time = datetime.now() log['start_time'] = str(start_time) tmp = NamedTemporaryFile(delete=False, dir=self.tmpdir) args = [main_dir+'/bin/modify_table.R', '-m', fun, '--log', str(tmp.name)] if fun == 'filtering': if (self.abundance_threshold is None or self.abundance_threshold == 0) and (self.presence_threshold is None or self.presence_threshold == 0): log['status'] = 'skipped' return(0) args.extend(['--column', str(self.category_column), '--abundance_threshold', self.abundance_threshold, '--abundance_threshold_type', self.abundance_threshold_type, '--presence_threshold', self.presence_threshold, '--presence_threshold_type', self.presence_threshold_type]) elif fun == 'binning': if self.bin_levels is None: log['status'] = 'skipped' return(0) args.extend(['--column', str(self.category_column), '--fun', self.bin_fun]) output_fps = [] for fp in self.data_fp: args.extend(['-t', fp]) if fun == 'filtering': output_fp = fp +'_filtered.txt' output_fps.append(output_fp) args.extend(['-o', output_fp]) sub = Popen(args, stdout=PIPE, stderr=PIPE) sub.wait() self.appendStdout(sub.stdout.read()) self.appendStderr(sub.stderr.read()) elif fun == 'binning': vstyle = 2 if len(self.bin_levels) > 1 else 1 for level in self.bin_levels: output_fp = fp+'_lvl'+level+'.txt' output_fps.append(output_fp) args.extend(['--level', level, '--vstyle', str(vstyle), '-o', output_fp]) sub = Popen(args, stdout=PIPE, stderr=PIPE) sub.wait() self.appendStdout(sub.stdout.read()) self.appendStderr(sub.stderr.read()) vstyle = 3 if os.path.exists(tmp.name): with open(tmp.name, 'r') as t: log['messages'].extend(stdoutLog2json(t)) os.remove(tmp.name) tmp.close() log['status'] = 'completed' log['execution_time'] = str(datetime.now() - start_time) self.data_fp = output_fps def transform(self, fun=''): log = [e for e in [o for o in self.log if o['name'] == 'transformation'][0]['operations'] if e['name'] == fun][0] log['status'] = 'running' start_time = datetime.now() log['start_time'] = str(start_time) tmp = NamedTemporaryFile(delete=False, dir=self.tmpdir) args = [main_dir+'/bin/modify_table.R', '--log', str(tmp.name), '--ignore', str(self.category_column)] if fun == 'rarefaction': if self.rarefy is None or self.rarefy == False: log['status'] = 'skipped' return(0) args.extend(['-m', 'rarefaction', '--sample', self.rarefaction_depth, '--nsampling', self.nsampling]) elif fun == 'transformation': if self.transformation is None or self.transformation in ['none', '', ' ']: log['status'] = 'skipped' return(0) args.extend(['-m', self.transformation]) elif fun == 'batch_effect_suppression': if self.batch_effect is None: log['status'] = 'skipped' return(0) args.extend(['-m', 'batch_effect_suppression', '--map', self.map.fp, '--effect', self.batch_effect, '--fun', self.batch_effect_fun]) suffix = {'rarefaction':'rar', 'transformation':'trans', 'batch_effect_suppression':'bes'} output_fps = [] vstyle = 1 for fp in self.data_fp: output_fp = ('.').join(fp.split('.')[:-1])+'_'+suffix[fun]+'.txt' output_fps.append(output_fp) args.extend(['-t', fp, '--vstyle', str(vstyle), '-o', output_fp]) sub = Popen(args, stdout=PIPE, stderr=PIPE) sub.wait() self.appendStdout(sub.stdout.read()) self.appendStderr(sub.stderr.read()) vstyle = 2 if os.path.exists(tmp.name): with open(tmp.name, 'r') as t: log['messages'].extend(stdoutLog2json(t)) os.remove(tmp.name) tmp.close() log['status'] = 'completed' log['execution_time'] = str(datetime.now() - start_time) self.data_fp = output_fps def appendStdout(self, stdout): if stdout is not None and stdout != '': self.stdout.append(stdout) def appendStderr(self, stderr): if stderr is not None and stderr != '': self.stderr.append(stderr) class Analysis(object): """THE ANALYSIS""" def __init__(self, json_fp): """initialize the analysis pipeline json_fp File path to json """ # INITIALIZE THE PIPELINE self.pipeline = [{'name':'diversity', 'before_step':'transformation', 'status':'pending', 'messages':[]}, {'name':'clustering', 'before_step':'analysis', 'status':'pending', 'messages':[]}, {'name':'proportions', 'status':'pending', 'messages':[]}, {'name':'adonis', 'status':'pending', 'messages':[]}, {'name':'pca', 'status':'pending', 'messages':[]}, {'name':'ca', 'status':'pending', 'messages':[]}, {'name':'pcoa', 'status':'pending', 'messages':[]}, {'name':'cca', 'status':'pending', 'messages':[]}, {'name':'heatmap', 'status':'pending', 'messages':[]}, {'name':'change', 'status':'pending', 'messages':[]}, {'name':'correlation_network', 'status':'pending', 'messages':[]}, {'name':'similarity_network', 'status':'pending', 'messages':[]}] # Set parameters from json self.parameters = Parameters(json_fp) # Make the output directory self.output = self.parameters.params['output'] if os.path.exists(self.output): rmtree(self.output) mkdir(self.output) self.tmpdir = os.path.normpath(self.output+'/../'+mkdtemp()) mkdir(self.tmpdir) self.to_rm = [self.tmpdir] # Set the archive #self.archive = Archive(self.output+'/output.tar.gz') # Store parameters to output directory self.parameters.write(self.output+'/params.json') # Start logging self.log = Log(self.output+'/log.json') self.stdout = Log(self.output+'/stdout.log') self.stderr = Log(self.output+'/stderr.log') def run(self): """Run the analysis""" # Create data sets self.map = Map(self.parameters.params['map'], self.output) self.log.data.append({'name':'map', 'type':'map', 'log':self.map.log}) self.log.update() #self.archive.add(self.log.fp+'.safe', update=False, generate=False) log = [e for e in self.map.log if e['name'] == 'validation'][0] log['status'] = 'running' self.log.update() self.map.validate() if len([e for e in log['messages'] if 'error' in e.keys()]) > 0: log['status'] = 'failed' self.log.update() Error(msg='Error detected in '+self.map.fp, to_rm=self.to_rm) self.log.update() if self.parameters.params['bin_levels'] is None or isinstance(self.parameters.params['bin_levels'], list): bin_levels = self.parameters.params['bin_levels'] else: bin_levels = self.parameters.params['bin_levels'].split(',') self.primary_dataset = DataSet(json_fp = self.parameters.json_fp, data_fp = self.parameters.params['primary_dataset'], output = self.output, map = self.map, category_column = self.parameters.params['category_column'], abundance_threshold = self.parameters.params['abundance_threshold'], abundance_threshold_type = self.parameters.params['abundance_threshold_type'], presence_threshold = self.parameters.params['presence_threshold'], presence_threshold_type = self.parameters.params['presence_threshold_type'], bin_levels = bin_levels, bin_fun = self.parameters.params['bin_fun'], rarefy = bool(self.parameters.params['prim_rarefaction']), rarefaction_depth = self.parameters.params['prim_sampling_depth'], nsampling = self.parameters.params['prim_nsampling'], transformation = self.parameters.params['prim_trans_method'], batch_effect = self.parameters.params['prim_batch_effect_suppression'], batch_effect_fun = self.parameters.params['prim_batch_effect_suppression_fun']) self.log.data.append({'name':'primary_dataset', 'type':'dataset', 'log':self.primary_dataset.log}) self.log.update() self.stdout.data.append({'primary_dataset':self.primary_dataset.stdout}) self.stderr.data.append({'primary_dataset':self.primary_dataset.stderr}) self.to_rm.append(self.primary_dataset.tmpdir) #self.archive.add(self.primary_dataset.data_fp, update=False, generate=False) log = [e for e in self.primary_dataset.log if e['name'] == 'validation'][0] log['status'] = 'running' self.log.update() self.primary_dataset.validate() if len([e for e in log['messages'] if 'error' in e.keys()]) > 0: log['status'] = 'failed' self.log.update() Error(msg='Error detected in primary_dataset.validate()', to_rm=self.to_rm) self.log.update() log = [e for e in self.primary_dataset.log if e['name'] == 'sorting'][0] log['status'] = 'running' self.log.update() self.primary_dataset.sort() self.stdout.update() self.stderr.update() if len([e for e in log['messages'] if 'error' in e.keys()]) > 0: log['status'] = 'failed' self.log.update() Error(msg='See'+self.log.fp+'.', to_rm=self.to_rm) self.log.update() if self.parameters.params['secondary_dataset'] is not None: self.secondary_dataset = DataSet(json_fp = self.parameters.json_fp, pipeline = [{'name':'transformation', 'fun':'transform', 'operations':['rarefaction', 'transformation', 'batch_effect_suppression']}], data_fp = self.parameters.params['secondary_dataset'], output = self.output, map = self.map, category_column = '', rarefy = bool(self.parameters.params['sec_rarefaction']), rarefaction_depth = self.parameters.params['sec_sampling_depth'], nsampling = self.parameters.params['sec_nsampling'], transformation = self.parameters.params['sec_trans_method'], batch_effect = self.parameters.params['sec_batch_effect_suppression'], batch_effect_fun = self.parameters.params['prim_batch_effect_suppression_fun']) self.log.data.append({'name':'secondary_dataset', 'type':'dataset', 'log':self.secondary_dataset.log}) self.log.update() self.stdout.data.append({'secondary_dataset':self.secondary_dataset.stdout}) self.stderr.data.append({'secondary_dataset':self.secondary_dataset.stderr}) self.to_rm.append(self.secondary_dataset.tmpdir) log = [e for e in self.secondary_dataset.log if e['name'] == 'validation'][0] log['status'] = 'running' self.log.update() self.secondary_dataset.validate() if len([e for e in log['messages'] if 'error' in e.keys()]) > 0: log['status'] = 'failed' self.log.update() Error(msg='Error detected in secondary_dataset.validate()', to_rm=self.to_rm) self.log.update() log = [e for e in self.secondary_dataset.log if e['name'] == 'sorting'][0] log['status'] = 'running' self.log.update() self.secondary_dataset.sort() self.stdout.update() self.stderr.update() if len([e for e in log['messages'] if 'error' in e.keys()]) > 0: log['status'] = 'failed' self.log.update() Error(msg='See'+self.log.fp+'.', to_rm=self.to_rm) self.log.update() # RUN DATASET PIPELINE self.log.data.append({'name':'analysis', 'type':'analysis', 'log':self.pipeline}) self.stdout.data.append({'analysis':[]}) self.stderr.data.append({'analysis':[]}) datasets = [self.primary_dataset] if self.parameters.params['secondary_dataset'] is not None: datasets.append(self.secondary_dataset) for dataset in datasets: for step in dataset.pipeline: if dataset == datasets[0]: analyses = [a for a in [e for e in self.pipeline if 'before_step' in e.keys()] if a['before_step'] == step['name']] for analysis in analyses: eval('self.analysis("'+analysis['name']+'")') for operation in step['operations']: log = [o for o in [ s for s in dataset.log if s['name'] == step['name']][0]['operations'] if o['name'] == operation][0] log['status'] = 'running' self.log.update() eval('dataset.'+step['fun']+'("'+operation+'")') self.stdout.update() self.stderr.update() if len([e for e in log['messages'] if 'error' in e.keys()]) > 0: log['status'] = 'failed' self.log.update() Error(msg='See'+self.log.fp+'.', to_rm=self.to_rm) self.log.update() # COMPLETE ANALYSIS PIPELINE analyses = [a for a in [e for e in self.pipeline if 'before_step' in e.keys()] if a['before_step'] == 'analysis'] for analysis in analyses: eval('self.analysis("'+analysis['name']+'")') analyses = [a for a in [e for e in self.pipeline if 'before_step' not in e.keys()]] for analysis in analyses: eval('self.analysis("'+analysis['name']+'")') self.cleanup() def cleanup(self): self.log.wrapup() self.stdout.update() self.stderr.update() #self.archive.update() for t in self.to_rm: rmtree(t) def analysis(self, method=''): """Run R scripts to generate figures and stats""" log = [e for e in [l for l in self.log.data if l['name'] == 'analysis'][0]['log'] if e['name'] == method][0] stdout = [ e for e in self.stdout.data if e.keys()[0] == 'analysis'][0]['analysis'] stderr = [ e for e in self.stderr.data if e.keys()[0] == 'analysis'][0]['analysis'] if method not in self.parameters.params.keys() or self.parameters.params[method] == 0: log['status'] = 'skipped' self.log.update() return(0) log['status'] = 'running' start_time = datetime.now() log['start_time'] = str(start_time) self.log.update() append_to_map = False output = self.output+'/'+method mkdir(output) tmp = NamedTemporaryFile(delete=False, dir=self.tmpdir) # Set general arguments args = [main_dir+'/bin/analyse_table.R', '--graphical', 'TRUE', '--log', str(tmp.name), '--method', method, '--map', self.map.fp, '--category', self.primary_dataset.category_column] # Set analysis-specific arguments if method == 'diversity': if self.parameters.params['diversity_metric'] is not None: args.extend(['--fun', ','.join(self.parameters.params['diversity_metric'])]) if self.parameters.params['compare_diversity'] is not None: args.extend(['--compare_diversity', self.parameters.params['compare_diversity']]) elif method == 'clustering': append_to_map = True if self.parameters.params['clustering_fun'] is not None: clustering_fun = self.parameters.params['clustering_fun'] if isinstance(clustering_fun, list): # NOT SUPPORTED BY R SCRIPT YET clustering_fun = ','.join(clustering_fun) args.extend(['--fun', clustering_fun]) elif method == 'adonis': if self.parameters.params['adonis_model'] is not None: adonis_model = self.parameters.params['adonis_model'] if isinstance(adonis_model, list): adonis_model = ','.join(adonis_model) args.extend(['--adonis_model', adonis_model]) if self.parameters.params['adonis_distfun'] is not None: adonis_distfun = self.parameters.params['adonis_distfun'] if isinstance(adonis_distfun, list): adonis_distfun = ','.join(adonis_distfun) args.extend(['--fun', adonis_distfun]) if self.parameters.params['adonis_strata'] is not None: adonis_strata = self.parameters.params['adonis_strata'] if isinstance(adonis_strata, list): adonis_strata = ','.join(adonis_strata) args.extend(['--strata', adonis_strata]) elif method == 'pcoa': pcoa_distfun = self.parameters.params['pcoa_distfun'] if pcoa_distfun is not None: if isinstance(pcoa_distfun, list): pcoa_distfun = ','.join(pcoa_distfun) args.extend(['--fun', str(pcoa_distfun)]) elif method == 'cca': if self.parameters.params['cca_categ'] is not None: cca_categ = self.parameters.params['cca_categ'] if isinstance(cca_categ, list): cca_categ = ','.join(cca_categ) args.extend(['--column', cca_categ]) else: log['messages'].append({'warning':'No categorical data was given in CCA. Please provide the name(s) of column(s) in map that contain(s) categorical data to be used in CCA.'}) log['status'] = 'skipped' self.log.update() return(0) elif method == 'heatmap' and self.parameters.params['secondary_dataset'] is not None: args.extend(['--secondary', self.secondary_dataset.data_fp[0]]) elif method == 'correlation_network': if self.parameters.params['correlation_network_fun'] is not None: correlation_network_fun = self.parameters.params['correlation_network_fun'] if isinstance(correlation_network_fun, list): correlation_network_fun = ','.join(correlation_network_fun) args.extend(['--fun', correlation_network_fun]) if self.parameters.params['secondary_dataset'] is not None: args.extend(['--secondary', self.secondary_dataset.data_fp[0]]) elif method == 'similarity_network': append_to_map = True if self.parameters.params['similarity_network_fun1'] is not None: fun = self.parameters.params['similarity_network_fun1'] if self.parameters.params['secondary_dataset'] is not None and self.parameters.params['similarity_network_fun2'] is not None: fun = fun+','+self.parameters.params['similarity_network_fun2'] args.extend(['--fun', fun]) if self.parameters.params['similarity_network_clust'] is not None: args.extend(['--clust', self.parameters.params['similarity_network_clust']]) if self.parameters.params['secondary_dataset'] is not None: args.extend(['--secondary', self.secondary_dataset.data_fp[0]]) if len(self.primary_dataset.data_fp) > 1: if len(self.primary_dataset.data_fp) != len(self.parameters.params['bin_levels']): Error(msg='Analysis cannot be performed at specified binning levels because corresponding data files were not found.', to_rm=self.to_rm) log['levels'] = log.pop('messages') for i in range(0, len(self.parameters.params['bin_levels'])): log['levels'].append({'level':self.parameters.params['bin_levels'][i], 'status':'pending', 'messages':[]}) for i in range(0, len(self.primary_dataset.data_fp)): l = log output_fp = output+'/'+method if len(self.primary_dataset.data_fp) > 1: l = [e for e in log['levels'] if e['level'] == self.parameters.params['bin_levels'][i]][0] output_fp = output_fp+'_level_'+self.parameters.params['bin_levels'][i] l['status'] = 'running' args_cp = copy(args) args_cp.extend(['-t', self.primary_dataset.data_fp[i], '-o', output_fp]) if self.parameters.params['model_type'] == 'basic': if isinstance(self.parameters.params['basic_model'], list): m = ','.join(self.parameters.params['basic_model']) else: m = self.parameters.params['basic_model'] args_cp.extend(['--model', m]) else: if isinstance(self.parameters.params['advanced_model'], list): m = ','.join(self.parameters.params['advanced_model']) else: m = self.parameters.params['advanced_model'] + + if isinstance(self.parameters.params['advanced_stats'], list): + s = ','.join(self.parameters.params['advanced_stats']) + else: + s = self.parameters.params['advanced_stats'] + args_cp.extend(['--stats', - self.parameters.params['advanced_stats'], + s, '--model', m]) sub = Popen(args_cp, stdout=PIPE, stderr=PIPE) sub.wait() s = sub.stdout.read() + #print s if s is not None and s != '': stdout.append(s) s = sub.stderr.read() if s is not None and s != '': stderr.append(s) if os.path.exists(tmp.name): with open(tmp.name, 'r') as t: l['messages'].extend(stdoutLog2json(t)) os.remove(tmp.name) tmp.close() self.log.update() if append_to_map: if os.path.exists(output_fp+'.txt') and os.stat(output_fp+'.txt').st_size != 1: tmpmap = NamedTemporaryFile(delete=False, dir=self.tmpdir) with open(tmpmap.name, 'w') as new, open(self.map.fp) as m, open(output_fp+'.txt') as c: i = 0 for line_m, line_c in zip(m, c): to_add = line_c.rstrip().split('\t') to_add.pop(0) if i == 0: while to_add in line_m.rstrip().split('\t'): to_add = to_add+'_'+rand(3) model_to_add = to_add new.write('%s\t%s\n' % (line_m.rstrip(), '\t'.join(to_add))) i = i+1 move(tmpmap.name, self.map.fp) if self.parameters.params['model_type'] == 'basic': if not isinstance(self.parameters.params['basic_model'], list): self.parameters.params['basic_model'] = [self.parameters.params['basic_model']] self.parameters.params['basic_model'].extend(model_to_add) else: - if not isinstance(self.parameters.params['basic_model'], list): + if not isinstance(self.parameters.params['advanced_model'], list): self.parameters.params['advanced_model'] = [self.parameters.params['advanced_model']] self.parameters.params['advanced_model'].extend(model_to_add) + if not isinstance(self.parameters.params['advanced_stats'], list): + self.parameters.params['advanced_stats'] = [self.parameters.params['advanced_stats']] + self.parameters.params['advanced_stats'].extend(['anova']) else: Warning(msg='In '+method+': output file not found ('+output_fp+'.txt'+').') if len([e for e in l['messages'] if 'error' in e.keys()]) > 0: l['status'] = 'failed' log['execution_time'] = str(datetime.now() - start_time) log['status'] = 'failed' self.log.update() Error(msg='See'+self.log.fp+'.', to_rm=self.to_rm) else: l['status'] = 'completed' self.log.update() log['execution_time'] = str(datetime.now() - start_time) log['status'] = 'completed' self.log.update() diff --git a/lib/genocrunch_console/lib/genocrunchlib.pyc b/lib/genocrunch_console/lib/genocrunchlib.pyc index a38c3c0..06ea672 100644 Binary files a/lib/genocrunch_console/lib/genocrunchlib.pyc and b/lib/genocrunch_console/lib/genocrunchlib.pyc differ