diff --git a/MetaCycPWanalysis/README.md b/MetaCycPWanalysis/README.md new file mode 100644 index 0000000..9a332c7 --- /dev/null +++ b/MetaCycPWanalysis/README.md @@ -0,0 +1,22 @@ +# ATLASx Analysis tools + +The data and scripts contained in this repository allow the user to reproduce the +Figure 3: "Pathway search comparison to dataset of pathways extracted from MetaCyc" +of the ATLASx manuscript from the original data files and the Supplementary Figure S3: +"Distribution of the length (i.e., number of reaction steps) of reconstructed MetaCyc pathways." + +## Usage + +### MetaCyc pathway coverage plot + +`$ cd NetworkAnalysis/Source` + +Plot number of pathways covered within top-100, beyond top-100 and covered with alternative routes + +`$ python3 plot.py + +Running this script will produce 3 plot files within the "plots" folder. +onlyBNICEvsAllATLASx.png - figure 3 of the manuscript +plotGoldenDatasetPathwayLengthDistributionAll_all.png - main part of the Supplementary Figure 3 +plotGoldenDatasetPathwayLengthDistributionAll_crop.png - cropped in part of the Supplementary Figure 3 + diff --git a/MetaCycPWanalysis/plot.py b/MetaCycPWanalysis/plot.py index d7fad03..f068cfb 100644 --- a/MetaCycPWanalysis/plot.py +++ b/MetaCycPWanalysis/plot.py @@ -1,137 +1,137 @@ __author__ = 'anastasia' import pandas as pd import matplotlib.pyplot as plt import os import numpy as np sensitivity_analysis_file_exact = 'data/withinTop100.tsv' sensitivity_analysis_file_any = 'data/alternativeReconstruction.tsv' sensitivity_analysis_file_edges = 'data/beyondTop100.tsv' pink = '#ffffff' green = '#1c74ac' blue = '#5a9957' violet = '#efa6b4' # creating the directory for plots if not os.path.exists('plots'): os.mkdir('plots') def hypotheticalVSmaAll(): fig, ax = plt.subplots() values_MA = ['chemATLAS_dist_known'] values_HYP = ['chemATLAS_hp_dist_known'] bar1, _ = plotNumberOfReconstructedPerNTDT(sensitivity_analysis_file_any, values_MA, values_HYP, ax, [blue, blue]) bar2, _ = plotNumberOfReconstructedPerNTDT(sensitivity_analysis_file_edges, ['chemATLAS_edges'], ['chemATLAS_hp_edges'], ax, [violet, violet]) bar3, _ = plotNumberOfReconstructedPerNTDT(sensitivity_analysis_file_exact, values_MA, values_HYP, ax, [green, green]) labels = ['Only BNICE.ch-\ncurated reactions', 'All ATLASx'] ax.set_ylabel('Number of pathways') ax.axes.xaxis.set_visible(False) ax.text(0.2, -160, labels[0], ha='center', va='bottom') ax.text(0.02, -160, labels[1], ha='center', va='bottom') #ax.legend(loc="lower right") fig.legend((bar1, bar2, bar3), ('Alternative reconstruction', 'Exact reconstruction beyond top-100', 'Exact reconstruction within top-100')) fig.tight_layout() - plt.savefig('plots/hypotheticalVSmaAll.png', bbox = [6, 9]) + plt.savefig('plots/onlyBNICEvsAllATLASx.png', bbox = [6, 9]) def plotNumberOfReconstructedPerNTDT(sensitivity_analysis_file, values_MA, values_HYP, ax, colors): df = pd.read_csv( sensitivity_analysis_file, sep='\t') df = df.drop_duplicates() counts_MA = [df[i].to_list().count(1) for i in values_MA] counts_HYP = [df[i].to_list().count(1) for i in values_HYP] width = 0.1 # the width of the bars rects1 = ax.bar(0.2, counts_MA, width, color = colors[0]) rects2 = ax.bar(0.02, counts_HYP, width, color = colors[1]) # Add some text for labels, title and custom x-axis tick labels, etc. ax.set_ylabel('Number of reconstruted pathways') autolabel(rects1, ax) autolabel(rects2, ax) #fig.tight_layout() return rects1, rects2 def autolabel(rects, ax): #Attach a text label above each bar in *rects*, displaying its height. for rect in rects: height = rect.get_height() ax.annotate('{}'.format(height), xy=(rect.get_x() + rect.get_width() / 2, height-60), xytext=(0, 3), # 3 points vertical offset textcoords="offset points", ha='center', va='bottom', fontsize=7) hypotheticalVSmaAll() ######## Supplementary material plots ######### def plotLengthDistributionAll(): fig, ax = plt.subplots(figsize=(16,9)) search_setups = ['chemATLAS_dist_known', 'chemATLAS_hp_dist_known'] bar2 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_any, search_setups, ax, blue) bar4 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_edges, ['chemATLAS_edges','chemATLAS_hp_edges'], ax, violet) #['chemATLAS_edges','chemATLAS_hp_edges'] bar3 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_exact, search_setups, ax, green) ax.set_ylabel('Number of pathways', fontsize=20) ax.set_xlabel('Pathway length', fontsize=20) plt.xticks(np.arange(0, 30, 5), fontsize = 18) plt.yticks(np.arange(0, 450, 25), fontsize = 18) ax.legend((bar2, bar4, bar3), ('Alternative reconstruction', 'Exact reconstruction beyond top 100', 'Exact reconstruction within top 100'), fontsize=20) plt.xlim(0, 27) plt.ylim(0, 450) plt.savefig('plots/plotGoldenDatasetPathwayLengthDistributionAll_all.png', bbox = [9, 9]) def plotLengthDistributionAllCrop(): fig, ax = plt.subplots(figsize=(9,3)) #bar1 = plotGoldenDatasetPathwayLengthDistribution(ax, pink) search_setups = ['chemATLAS_dist_known', 'chemATLAS_hp_dist_known'] bar2 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_any, search_setups, ax, blue) bar4 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_edges, ['chemATLAS_edges','chemATLAS_hp_edges'], ax, violet) #['chemATLAS_edges','chemATLAS_hp_edges'] bar3 = plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file_exact, search_setups, ax, green) fig.tight_layout() plt.xticks(np.arange(10, 30, 5), fontsize = 21) plt.yticks(np.arange(5, 11, 5), fontsize = 21) plt.xlim(9.5, 27) plt.ylim(0, 12) plt.savefig('plots/plotGoldenDatasetPathwayLengthDistributionAll_crop.png', bbox = [3, 9]) def plotLengthOfReconstructedPerNTDT(sensitivity_analysis_file, search_setups, ax, color): df = pd.read_csv( sensitivity_analysis_file, sep='\t') df = df.drop_duplicates() setup1 = search_setups[0] setup2 = search_setups[1] values1 = list(set(df['Length'].to_list())) x = np.arange(2, len(values1)) # the label locations width = 0.4 # the width of the bars counts1 = [df[df[setup1]==1]['Length'].to_list().count(i) for i in values1] values1 = [i-1+ width / 2 for i in values1] rects1 = ax.bar(values1, counts1, width, label=setup1, color=color) values2 = list(set(df['Length'].to_list())) counts2 = [df[df[setup2]==1]['Length'].to_list().count(i) for i in values2] values2 = [i-1 - width / 2 for i in values2] rects2 = ax.bar(values2, counts2, width, label=setup2, color=color) return rects1, rects2 plotLengthDistributionAll() plotLengthDistributionAllCrop() \ No newline at end of file