analyse_dataset.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, Apr 25, 12:15

analyse_dataset.py
View Options

	# Modify from Jasmin's code, littlepythons/Halflife_modeling/Data_preparation/analyse_expanded_dataset.py
	# Analyze expanded dataset with experimental conditions for each halflife

	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import numpy as np
	import sys
	sys.path.insert(0, sys.path[0] + '/../')
	# from utils import *
	# from Bayesian import *
	import scipy
	import statsmodels.api as sm
	import statsmodels.formula.api as smf
	import re

	file_location = "C:\\Users\\leetseng\\TWtest"
	file_name = '\\input\\sludgeDatasetMerge.tsv'
	input_file_path_ = file_location+file_name
	# output_file_path = file_location + '\\output\\sludgeDatasetFromRichDescribe.tsv'


	def main():
	df = pd.read_csv(input_file_path)
	#you need to import the excel since the tsv file append the new datasets with header.
	print_all_distributions(df)


	def print_all_distributions(df):
	figure, axes = plt.subplots(2, 5, figsize=(20, 10)) # rows, columns
	sns.set_style("whitegrid")
	sns.violinplot(data=df, y='halflife', inner='stick', ax=axes[0, 0])
	sns.violinplot(data=df, y='rateconstant', inner='stick', ax=axes[0, 1])
	sns.violinplot(data=df, y='acidity', inner='stick', ax=axes[0, 2])
	sns.violinplot(data=df, y='temperature', inner='stick', ax=axes[0, 3])
	sns.violinplot(data=df, y='original_sludge_amount', inner='stick', ax=axes[0, 4])
	sns.violinplot(data=df, y='sludge_retention_time', inner='stick', ax=axes[1, 0])
	sns.violinplot(data=df, y='nitrogen_content_influent', inner='stick', ax=axes[1, 1])
	sns.violinplot(data=df, y='oxygen_demand_value', inner='stick', ax=axes[1, 2])
	sns.violinplot(data=df, y='phosphorus_content', inner='stick', ax=axes[1, 3])
	sns.violinplot(data=df, y='bioreactor_value', inner='stick', ax=axes[1, 4])
	figure.tight_layout()
	plt.savefig(file_location+'\\output\\figures\\distribution_sludge_dataset_MergeXXXXXXXXXXXXXX.pdf')
	plt.close()

	# open the file in excel
	# file_location = "C:\\Users\\leetseng\\TWtest"
	# file_name = '\\input\\sludgeDatasetMerge.tsv'
	# input_file_path_ = file_location+file_name
	# output_file_path = file_location + '\\output\\sludgeDatasetFromRichDescribe.tsv'
	# def main():
	# df = pd.read_excel(input_file_path)
	# #you need to import the excel since the tsv file append the new datasets with header.
	# print_all_distributions(df)
	#
	#
	# def print_all_distributions(df):
	# figure, axes = plt.subplots(2, 5, figsize=(20, 10)) # rows, columns
	# sns.set_style("whitegrid")
	# sns.violinplot(data=df, y='halflife', inner='stick', ax=axes[0, 0])
	# sns.violinplot(data=df, y='rateconstant', inner='stick', ax=axes[0, 1])
	# sns.violinplot(data=df, y='acidity', inner='stick', ax=axes[0, 2])
	# sns.violinplot(data=df, y='temperature', inner='stick', ax=axes[0, 3])
	# sns.violinplot(data=df, y='original_sludge_amount', inner='stick', ax=axes[0, 4])
	# sns.violinplot(data=df, y='sludge_retention_time', inner='stick', ax=axes[1, 0])
	# sns.violinplot(data=df, y='nitrogen_content_influent', inner='stick', ax=axes[1, 1])
	# sns.violinplot(data=df, y='oxygen_demand_value', inner='stick', ax=axes[1, 2])
	# sns.violinplot(data=df, y='phosphorus_content', inner='stick', ax=axes[1, 3])
	# sns.violinplot(data=df, y='bioreactor_value', inner='stick', ax=axes[1, 4])
	# figure.tight_layout()
	# plt.savefig(file_location+'\\output\\figures\\distribution_sludge_dataset_MergeXXXXXXXXXXXXXX.pdf')
	# plt.close()

	main()

analyse_dataset.pyNo OneTemporaryActions

File Metadata

analyse_dataset.pyView Options

Event Timeline

analyse_dataset.py
No OneTemporary
Actions

analyse_dataset.py
View Options