diff --git a/calculate_target_variables.py b/calculate_target_variables.py
index 30ebe11..1ae043f 100644
--- a/calculate_target_variables.py
+++ b/calculate_target_variables.py
@@ -1,270 +1,270 @@
 import pandas as pd
 import numpy as np
 import seaborn as sns
 import matplotlib.pyplot as plt
 from scipy.stats import gmean
 from Bayesian import *
 
 output_file_path = "C:\\Users\\leetseng\\TWtest\\output\\"
 input_file_path = "C:\\Users\\leetseng\\TWtest\\input\\"
 df_raw = pd.read_csv(input_file_path + "sludge_raw_use_this_for_baymean_test.tsv", sep='\t')  # sludgeDatasetMergeCalculated.tsv  #sludgeDatasetMergeCalculatedAvglogHL.tsv
 CURATE_DATA_POINTS = False
 
 def main():
     df = calculate_target_variable(df_raw)
     df1 = describe_dropna_halflife(df)
     df_ = calculate_bay_mean_std(df1)
-    df_.to_csv(output_file_path+'sludge_bayesian_PriorMuStd_2.tsv', sep='\t')
-    plot_distribution(output_path=output_file_path + 'Distribution_comparison_PriorMuStd_09.pdf')
+    df_.to_csv(output_file_path+'sludge_bay_PriorMuStd_2.tsv', sep='\t')
+    plot_distribution(output_path=output_file_path + 'Distribution_comparison_PriorMuStd_2.pdf')
 
 def calculate_target_variable(df):
     df['hl_log_gmean'] = np.log10(get_geometric_mean(df, 'halflife'))
     df['hl_log_median'] = np.log10(get_median(df, 'halflife'))
     df['hl_log_std'] = np.log10(get_std(df, 'halflife'))
     df['hl_log_spread'] = get_hl_spread(df)
     df['biomass_hl_log_gmean'] = np.log10(get_geometric_mean(df, 'hl_biomass_corrected'))
     df['biomass_hl_log_median'] = np.log10(get_median(df, 'hl_biomass_corrected'))
     df['biomass_hl_log_std'] = np.log10(get_std(df, 'hl_biomass_corrected'))
     df['biomass_hl_log_spread'] = get_biomass_hl_spread(df)
     df['acidity_std'] = get_std(df, 'acidity')
     df['temperature_std'] = get_std(df, 'temperature')
     df['biomass_log_std'] = get_std(df, 'total_suspended_solids_concentration_start')
     return df
 
 def describe_dropna_halflife(df):
     df.dropna(subset=['halflife', 'halflife_log'], inplace=True)  # Remove the NaN in halflife column, otherwise you get ValueError in bmean, bstd calculation.
     # df.to_csv(output_file_path + 'sludge_calculated_test_for_baycalculation.tsv', sep='\t')   #'sludge_calculated.tsv'
     df_ = df.copy()
     description = df_.describe()
     print("Summary of loaded data:\n------------------\n", description)
     description.to_csv(output_file_path + 'sludge_calculated_test_for_baycalculation_describe.tsv', sep='\t')
     return df
 
 def calculate_bay_mean_std(df):
 
     bmean, bstd = get_bayesian_stats(df)
-    df['hl_log_bayesian_mean'] = bmean
-    df['hl_log_bayesian_std'] = bstd
+    df['hl_bayesian_mean'] = bmean
+    df['hl_bayesian_std'] = bstd
     # df.to_csv(output_file_path+'sludge_calculated_test_for_baycalculation.tsv', sep='\t')
     return df
 
 
 def get_std(df, column):
     new = []
     for index, row in df.iterrows():
         this = df.loc[df['reduced_smiles'] == row['reduced_smiles']]
         std = np.nanstd(this[column])
         if std == 0:
             new.append(np.NaN)
         else:
             new.append(std)
     return new
 
 def get_mean(df, column):
     new = []
     for index, row in df.iterrows():
         this = df.loc[df['reduced_smiles'] == row['reduced_smiles']]
         std = np.nanmean(this[column])
         new.append(std)
     return new
 
 def get_median(df, column):
     new = []
     for index, row in df.iterrows():
         this = df.loc[df['reduced_smiles'] == row['reduced_smiles']]
         std = np.nanmedian(this[column])
         new.append(std)
     return new
 
 def get_hl_spread(df):
     new = []
     for index, row in df.iterrows():
         this = df.loc[df['reduced_smiles'] == row['reduced_smiles']]
         spread = max(this['halflife_log']) - min(this['halflife_log'])
         new.append(spread)
     return new
 
 def get_biomass_hl_spread(df):
     new = []
     for index, row in df.iterrows():
         this = df.loc[df['reduced_smiles'] == row['reduced_smiles']]
         spread = max(np.log10(this['hl_biomass_corrected'])) - min(np.log10(this['hl_biomass_corrected']))
         new.append(spread)
     return new
 
 
 def legal(value, name):
     if np.isnan(value):
         print(f"Problem: no {name}")
         return False
     elif value == 0:
         print(f"Problem: {name} is 0")
         return False
     return True
 
 def g_mean(x):
     a = np.log(x)
     return np.exp(a.mean())
 
 def get_geometric_mean(df, column):
     new = []
     for index, row in df.iterrows():
         this = df.loc[df['reduced_smiles'] == row['reduced_smiles']]
         gmean = g_mean(this[column])
         new.append(gmean)
     return new
 
 # def get_gmean(df, column):
 #     new = []
 #     for index, row in df.iterrows():
 #         this = df.loc[df['reduced_smiles'] == row['reduced_smiles']
 #         v = this[column]
 #         if not legal(v, f"{column}"):
 #             return np.NaN
 #
 #         if legal(v, f"{column}"):
 #             std = gmean(v)
 #             new.append(std)
 #     return new
 
 def process_comment_list(comment_list):
     new_list = []
     for comment in comment_list:
         if type(comment) == float:
             new_list.append('')
         elif '<' in comment:
             new_list.append('<')
         elif '>' in comment:
             new_list.append('>')
         else:
             new_list.append('')
     return new_list
 
 def get_bayesian_stats(df):
     mean_list = []
     std_list = []
     results = {} # {'index': (mean, std)}
     for index, row in df.iterrows():
         if row['reduced_smiles'] in results.keys():
             mean, std = results[row['reduced_smiles']]
         else:
             this = df.loc[df['reduced_smiles'] == row['reduced_smiles']]
             comment_list_raw = process_comment_list(this["halflife_comment"])
             y_raw = np.array(this['halflife_log'])
             if CURATE_DATA_POINTS == True:
                 pass
             else:
                 y = y_raw
                 comment_list = comment_list_raw
             print("\nCOMPOUND reduced_smiles {}".format(row['reduced_smiles']))
             print("Compute bayes for {} with comments {}".format(y, comment_list))
             bayesian = Bayesian(y=y, comment_list=comment_list)
             bayesian.set_prior_mu(mean=0.2, std=2)     #(Original: mean=1.5, std=2) Set prior_mu_std as 2
             bayesian.set_prior_sigma(mean=0.4, std=0.9) #(Original: mean=0.2, std=0.5)
             mean, std = bayesian.get_posterior_distribution()
             results[row['reduced_smiles']] = (mean, std)
             print('mean: {}, std: {}'.format(mean, std))
             bayesian.plot_distribution(output_path=output_file_path + 'Distribution_comparison_PriorMuStd_2.pdf')  ## I add this line to plot the data distribution.
         mean_list.append(round(mean, 2))
         std_list.append(round(std, 2))
 
     return mean_list, std_list
 
     # column_groupby_smiles_sludge(df_raw, 'log_hl_biomass_corrected')
 # def rename_column(df):
 #     df_rename = df.rename(columns={
 #                        f'mean_log_hl_combined': 'hl_log_mean',
 #                        f'median_log_hl_combined': 'hl_log_median',
 #                        f'geomean_log_hl_combined': 'hl_log_gmean',
 #                        f'std_log_hl_combined': 'hl_log_std',
 #                        f'mean_log_hl_biomass_corrected': 'biomass_hl_log_mean',
 #                        f'median_log_hl_biomass_corrected': 'biomass_hl_log_median',
 #                        f'geomean_log_hl_biomass_corrected': 'biomass_hl_log_gmean',
 #                        f'std_log_hl_biomass_corrected': 'biomass_hl_log_std'
 #                        })
 #     return df_rename
 
 def column_groupby_smiles_sludge(df, column):
     mean_column_list = []
     median_column_list = []
     geomean_column_list = []
     std_column_list = []
     for index, row in df.iterrows():
     #     if not legal(row[column], f"row[{column}]"):
     #         return np.NaN
     #
     #     elif legal(row[column], f"row[{column}]"):
 
-        x = df.loc[df['smiles'] == row['smiles']]
+        x = df.loc[df['reduced_smiles'] == row['reduced_smiles']]
         mean_ = np.nanmean(x[column])
         median_ = np.nanmedian(x[column])
         g_ = gmean(x['hl_biomass_corrected'])
         geomean_ = np.log10(g_)
         std_ = np.nanstd(x[column])
         mean_column_list.append(mean_)
         median_column_list.append(median_)
         geomean_column_list.append(geomean_)
         std_column_list.append(std_)
     df[f'mean_{column}'] = mean_column_list
     df[f'median_{column}'] = median_column_list
     df[f'geomean_{column}'] = geomean_column_list
     df[f'std_{column}'] = std_column_list
     df.rename(columns={
                         'mean_log_hl_combined': 'hl_log_mean',
                         'median_log_hl_combined': 'hl_log_median',
                         'geomean_log_hl_combined': 'hl_log_gmean',
                         'std_log_hl_combined': 'hl_log_std',
                         'mean_log_hl_biomass_corrected': 'biomass_hl_log_mean',
                         'median_log_hl_biomass_corrected': 'biomass_hl_log_median',
                         'geomean_log_hl_biomass_corrected': 'biomass_hl_log_gmean',
                         'std_log_hl_biomass_corrected': 'biomass_hl_log_std'
                         }, inplace=True)
     df_new = df.to_csv(output_file_path + "sludgeDatasetMergeCalculatedAvglogHLBiomass.tsv", sep='\t')
 
     return df_new
 
 if __name__ == '__main__':
     main()
 
 """    
 The following code is my initial script for fetching the avg values of logDT50 and logDT50' 
 
 def column_groupby_smiles_sludge(df, column):
     mean_column_list = []
     median_column_list = []
     geomean_column_list = []
     std_column_list = []
     for index, row in df.iterrows():
     #     if not legal(row[column], f"row[{column}]"):
     #         return np.NaN
     #
     #     elif legal(row[column], f"row[{column}]"):
 
         x = df.loc[df['smiles'] == row['smiles']]
         mean_ = np.nanmean(x[column])
         median_ = np.nanmedian(x[column])
         g_ = gmean(x['hl_biomass_corrected'])
         geomean_ = np.log10(g_)
         std_ = np.nanstd(x[column])
         mean_column_list.append(mean_)
         median_column_list.append(median_)
         geomean_column_list.append(geomean_)
         std_column_list.append(std_)
     df[f'mean_{column}'] = mean_column_list
     df[f'median_{column}'] = median_column_list
     df[f'geomean_{column}'] = geomean_column_list
     df[f'std_{column}'] = std_column_list
     df.rename(columns={
                         'mean_log_hl_combined': 'hl_log_mean',
                         'median_log_hl_combined': 'hl_log_median',
                         'geomean_log_hl_combined': 'hl_log_gmean',
                         'std_log_hl_combined': 'hl_log_std',
                         'mean_log_hl_biomass_corrected': 'biomass_hl_log_mean',
                         'median_log_hl_biomass_corrected': 'biomass_hl_log_median',
                         'geomean_log_hl_biomass_corrected': 'biomass_hl_log_gmean',
                         'std_log_hl_biomass_corrected': 'biomass_hl_log_std'
                         }, inplace=True)
     df_new = df.to_csv(output_file_path + "sludgeDatasetMergeCalculatedAvglogHLBiomass.tsv", sep='\t')
 
     return df_new
 """
\ No newline at end of file