diff --git a/MetaCycPWanalysis/PathwaysReconstructionNative-sensitivity_analysis 20200728 11.19.tsv b/MetaCycPWanalysis/data/alternativeReconstruction.tsv similarity index 100% rename from MetaCycPWanalysis/PathwaysReconstructionNative-sensitivity_analysis 20200728 11.19.tsv rename to MetaCycPWanalysis/data/alternativeReconstruction.tsv diff --git a/MetaCycPWanalysis/PathwaysEdgesPresents-sensitivity analysis 20200729 13.08.tsv b/MetaCycPWanalysis/data/beyondTop100.tsv similarity index 100% rename from MetaCycPWanalysis/PathwaysEdgesPresents-sensitivity analysis 20200729 13.08.tsv rename to MetaCycPWanalysis/data/beyondTop100.tsv diff --git a/MetaCycPWanalysis/PathwaysReconstructionNative-sensitivity_analysis 20200728 10.51.tsv b/MetaCycPWanalysis/data/withinTop100.tsv similarity index 100% rename from MetaCycPWanalysis/PathwaysReconstructionNative-sensitivity_analysis 20200728 10.51.tsv rename to MetaCycPWanalysis/data/withinTop100.tsv diff --git a/MetaCycPWanalysis/plot.py b/MetaCycPWanalysis/plot.py index 024ed1e..d7fad03 100644 --- a/MetaCycPWanalysis/plot.py +++ b/MetaCycPWanalysis/plot.py @@ -1,136 +1,137 @@ __author__ = 'anastasia' import pandas as pd import matplotlib.pyplot as plt import os import numpy as np -sensitivity_analysis_file_exact = 'PathwaysReconstructionNative-sensitivity_analysis 20200728 10.51.tsv' -sensitivity_analysis_file_any = 'PathwaysReconstructionNative-sensitivity_analysis 20200728 11.19.tsv' -sensitivity_analysis_file_edges = 'PathwaysEdgesPresents-sensitivity analysis 20200729 13.08.tsv' +sensitivity_analysis_file_exact = 'data/withinTop100.tsv' +sensitivity_analysis_file_any = 'data/alternativeReconstruction.tsv' +sensitivity_analysis_file_edges = 'data/beyondTop100.tsv' pink = '#ffffff' green = '#1c74ac' blue = '#5a9957' violet = '#efa6b4' +# creating the directory for plots +if not os.path.exists('plots'): + os.mkdir('plots') + def hypotheticalVSmaAll(): fig, ax = plt.subplots() values_MA = ['chemATLAS_dist_known'] values_HYP = ['chemATLAS_hp_dist_known'] bar1, _ = plotNumberOfReconstructedPerNTDT(sensitivity_analysis_file_any, values_MA, values_HYP, ax, [blue, blue]) bar2, _ = plotNumberOfReconstructedPerNTDT(sensitivity_analysis_file_edges, ['chemATLAS_edges'], ['chemATLAS_hp_edges'], ax, [violet, violet]) bar3, _ = plotNumberOfReconstructedPerNTDT(sensitivity_analysis_file_exact, values_MA, values_HYP, ax, [green, green]) labels = ['Only BNICE.ch-\ncurated reactions', 'All ATLASx'] ax.set_ylabel('Number of pathways') ax.axes.xaxis.set_visible(False) ax.text(0.2, -160, labels[0], ha='center', va='bottom') ax.text(0.02, -160, labels[1], ha='center', va='bottom') #ax.legend(loc="lower right") fig.legend((bar1, bar2, bar3), ('Alternative reconstruction', 'Exact reconstruction beyond top-100', 'Exact reconstruction within top-100')) fig.tight_layout() plt.savefig('plots/hypotheticalVSmaAll.png', bbox = [6, 9]) def plotNumberOfReconstructedPerNTDT(sensitivity_analysis_file, values_MA, values_HYP, ax, colors): df = pd.read_csv( sensitivity_analysis_file, sep='\t') df = df.drop_duplicates() - print(df.columns) counts_MA = [df[i].to_list().count(1) for i in values_MA] counts_HYP = [df[i].to_list().count(1) for i in values_HYP] width = 0.1 # the width of the bars rects1 = ax.bar(0.2, counts_MA, width, color = colors[0]) rects2 = ax.bar(0.02, counts_HYP, width, color = colors[1]) # Add some text for labels, title and custom x-axis tick labels, etc. ax.set_ylabel('Number of reconstruted pathways') autolabel(rects1, ax) autolabel(rects2, ax) #fig.tight_layout() return rects1, rects2 def autolabel(rects, ax): #Attach a text label above each bar in *rects*, displaying its height. for rect in rects: height = rect.get_height() ax.annotate('{}'.format(height), xy=(rect.get_x() + rect.get_width() / 2, height-60), xytext=(0, 3), # 3 points vertical offset textcoords="offset points", ha='center', va='bottom', fontsize=7) hypotheticalVSmaAll() ######## Supplementary material plots ######### def plotLengthDistributionAll(): fig, ax = plt.subplots(figsize=(16,9)) search_setups = ['chemATLAS_dist_known', 'chemATLAS_hp_dist_known'] bar2 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_any, search_setups, ax, blue) bar4 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_edges, ['chemATLAS_edges','chemATLAS_hp_edges'], ax, violet) #['chemATLAS_edges','chemATLAS_hp_edges'] bar3 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_exact, search_setups, ax, green) ax.set_ylabel('Number of pathways', fontsize=20) ax.set_xlabel('Pathway length', fontsize=20) plt.xticks(np.arange(0, 30, 5), fontsize = 18) plt.yticks(np.arange(0, 450, 25), fontsize = 18) ax.legend((bar2, bar4, bar3), ('Alternative reconstruction', 'Exact reconstruction beyond top 100', 'Exact reconstruction within top 100'), fontsize=20) plt.xlim(0, 27) plt.ylim(0, 450) plt.savefig('plots/plotGoldenDatasetPathwayLengthDistributionAll_all.png', bbox = [9, 9]) def plotLengthDistributionAllCrop(): fig, ax = plt.subplots(figsize=(9,3)) #bar1 = plotGoldenDatasetPathwayLengthDistribution(ax, pink) search_setups = ['chemATLAS_dist_known', 'chemATLAS_hp_dist_known'] bar2 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_any, search_setups, ax, blue) bar4 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_edges, ['chemATLAS_edges','chemATLAS_hp_edges'], ax, violet) #['chemATLAS_edges','chemATLAS_hp_edges'] bar3 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_exact, search_setups, ax, green) fig.tight_layout() plt.xticks(np.arange(10, 30, 5), fontsize = 21) plt.yticks(np.arange(5, 11, 5), fontsize = 21) plt.xlim(9.5, 27) plt.ylim(0, 12) plt.savefig('plots/plotGoldenDatasetPathwayLengthDistributionAll_crop.png', bbox = [3, 9]) def plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file, search_setups, ax, color): df = pd.read_csv( sensitivity_analysis_file, sep='\t') - print(df.columns) df = df.drop_duplicates() setup1 = search_setups[0] setup2 = search_setups[1] values1 = list(set(df['Length'].to_list())) x = np.arange(2, len(values1)) # the label locations width = 0.4 # the width of the bars counts1 = [df[df[setup1]==1]['Length'].to_list().count(i) for i in values1] values1 = [i-1+ width / 2 for i in values1] rects1 = ax.bar(values1, counts1, width, label=setup1, color=color) values2 = list(set(df['Length'].to_list())) counts2 = [df[df[setup2]==1]['Length'].to_list().count(i) for i in values2] values2 = [i-1 - width / 2 for i in values2] - #x = np.arange(2, len(values2)) rects2 = ax.bar(values2, counts2, width, label=setup2, color=color) return rects1, rects2 -#plotLengthDistributionAll() -#plotLengthDistributionAllCrop() \ No newline at end of file +plotLengthDistributionAll() +plotLengthDistributionAllCrop() \ No newline at end of file