Page MenuHomec4science

analyse_dataset.py
No OneTemporary

File Metadata

Created
Thu, Apr 25, 12:15

analyse_dataset.py

# Modify from Jasmin's code, littlepythons/Halflife_modeling/Data_preparation/analyse_expanded_dataset.py
# Analyze expanded dataset with experimental conditions for each halflife
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.insert(0, sys.path[0] + '/../')
# from utils import *
# from Bayesian import *
import scipy
import statsmodels.api as sm
import statsmodels.formula.api as smf
import re
file_location = "C:\\Users\\leetseng\\TWtest"
file_name = '\\input\\sludgeDatasetMerge.tsv'
input_file_path_ = file_location+file_name
# output_file_path = file_location + '\\output\\sludgeDatasetFromRichDescribe.tsv'
def main():
df = pd.read_csv(input_file_path)
#you need to import the excel since the tsv file append the new datasets with header.
print_all_distributions(df)
def print_all_distributions(df):
figure, axes = plt.subplots(2, 5, figsize=(20, 10)) # rows, columns
sns.set_style("whitegrid")
sns.violinplot(data=df, y='halflife', inner='stick', ax=axes[0, 0])
sns.violinplot(data=df, y='rateconstant', inner='stick', ax=axes[0, 1])
sns.violinplot(data=df, y='acidity', inner='stick', ax=axes[0, 2])
sns.violinplot(data=df, y='temperature', inner='stick', ax=axes[0, 3])
sns.violinplot(data=df, y='original_sludge_amount', inner='stick', ax=axes[0, 4])
sns.violinplot(data=df, y='sludge_retention_time', inner='stick', ax=axes[1, 0])
sns.violinplot(data=df, y='nitrogen_content_influent', inner='stick', ax=axes[1, 1])
sns.violinplot(data=df, y='oxygen_demand_value', inner='stick', ax=axes[1, 2])
sns.violinplot(data=df, y='phosphorus_content', inner='stick', ax=axes[1, 3])
sns.violinplot(data=df, y='bioreactor_value', inner='stick', ax=axes[1, 4])
figure.tight_layout()
plt.savefig(file_location+'\\output\\figures\\distribution_sludge_dataset_MergeXXXXXXXXXXXXXX.pdf')
plt.close()
# open the file in excel
# file_location = "C:\\Users\\leetseng\\TWtest"
# file_name = '\\input\\sludgeDatasetMerge.tsv'
# input_file_path_ = file_location+file_name
# output_file_path = file_location + '\\output\\sludgeDatasetFromRichDescribe.tsv'
# def main():
# df = pd.read_excel(input_file_path)
# #you need to import the excel since the tsv file append the new datasets with header.
# print_all_distributions(df)
#
#
# def print_all_distributions(df):
# figure, axes = plt.subplots(2, 5, figsize=(20, 10)) # rows, columns
# sns.set_style("whitegrid")
# sns.violinplot(data=df, y='halflife', inner='stick', ax=axes[0, 0])
# sns.violinplot(data=df, y='rateconstant', inner='stick', ax=axes[0, 1])
# sns.violinplot(data=df, y='acidity', inner='stick', ax=axes[0, 2])
# sns.violinplot(data=df, y='temperature', inner='stick', ax=axes[0, 3])
# sns.violinplot(data=df, y='original_sludge_amount', inner='stick', ax=axes[0, 4])
# sns.violinplot(data=df, y='sludge_retention_time', inner='stick', ax=axes[1, 0])
# sns.violinplot(data=df, y='nitrogen_content_influent', inner='stick', ax=axes[1, 1])
# sns.violinplot(data=df, y='oxygen_demand_value', inner='stick', ax=axes[1, 2])
# sns.violinplot(data=df, y='phosphorus_content', inner='stick', ax=axes[1, 3])
# sns.violinplot(data=df, y='bioreactor_value', inner='stick', ax=axes[1, 4])
# figure.tight_layout()
# plt.savefig(file_location+'\\output\\figures\\distribution_sludge_dataset_MergeXXXXXXXXXXXXXX.pdf')
# plt.close()
main()

Event Timeline