dataanalysis.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, May 16, 18:31

dataanalysis.py
View Options

	import numpy as np

	from ipywidgets import interact, interactive, fixed, interact_manual
	from ipywidgets import HBox, VBox, Label, Layout
	import ipywidgets as widgets

	from IPython.display import IFrame
	from IPython.display import set_matplotlib_formats, display, Math, Markdown, Latex, HTML
	set_matplotlib_formats('svg')

	# Enable interactive backend for matplotlib
	from IPython import get_ipython
	get_ipython().run_line_magic('matplotlib', 'inline')

	import matplotlib.pyplot as plt
	import matplotlib.patches as pat
	import matplotlib.ticker as ticker
	plt.style.use('seaborn-whitegrid') # global style for plotting

	import scipy.stats as stats

	def visualize_ttest(sample_size, alpha, t):
	# Create the t-test visualization
	fig, ax = plt.subplots(figsize=(12, 4))
	ax.set_title("Probability distribution of all possible sample means if $H_0$ is true")

	# Let's plot the T distribution for this sample size
	tdist = stats.t(df=sample_size, loc=0, scale=1)
	x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
	y = tdist.pdf(x)
	ax.plot(x, y, color='black', linewidth=1)

	# Polish the look of the graph
	ax.get_yaxis().set_visible(False) # hide the y axis
	ax.set_ylim(bottom=0)
	ax.grid(False) # hide the grid
	ax.spines['top'].set_visible(False) # hide the frame except bottom line
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_visible(False)

	# Plot the rejection zone two tailed
	x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
	x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
	y_zone_1 = tdist.pdf(x_zone_1)
	y_zone_2 = tdist.pdf(x_zone_2)
	ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'rejection of $H_0$ with $\alpha={}$'.format(alpha))
	ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')

	# Plot the t-test stat
	ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
	ax.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	# Add a legend
	ax.legend()

	# Display the graph
	plt.show()


	def visualize_ttest_pvalue(sample_size, alpha, t, p):

	# Create the t-test visualization
	fig, ax = plt.subplots(figsize=(12, 4))
	ax.set_title("Probability distribution of all possible sample means if $H_0$ is true")

	# Let's plot the T distribution for this sample size
	tdist = stats.t(df=sample_size, loc=0, scale=1)
	x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
	y = tdist.pdf(x)
	ax.plot(x, y, color='black', linewidth=1)

	# Polish the look of the graph
	ax.get_yaxis().set_visible(False) # hide the y axis
	ax.set_ylim(bottom=0)
	ax.grid(False) # hide the grid
	ax.spines['top'].set_visible(False) # hide the frame except bottom line
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_visible(False)

	# Plot the rejection zone two tailed
	x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
	x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
	y_zone_1 = tdist.pdf(x_zone_1)
	y_zone_2 = tdist.pdf(x_zone_2)
	ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'rejection of $H_0$ with $\alpha={}$'.format(alpha))
	ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')

	# Plot the t-test stats
	ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
	ax.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	# Plot the p-value
	if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
	else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)
	y_t = tdist.pdf(x_t)
	ax.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))


	# Add a legend
	ax.legend()

	# Display the graph
	plt.show()



	###### TO DELETE

	def build_ttest_visualization(ttest_result, alpha):
	# Extract information from the result of the t-test
	n = round(ttest_result.loc["T-test","dof"])
	t = ttest_result.loc["T-test","T"]
	p = ttest_result.loc["T-test","p-val"]
	d = ttest_result.loc["T-test","cohen-d"]

	# Create the figure
	fig = plt.figure(figsize=(14, 4))

	### 1. Create the t-test visualization
	ax1 = plt.subplot(121)
	ax1.set_title("Result of the t-test")

	# Let's plot the T distribution for this sample size
	tdist = stats.t(df=n, loc=0, scale=1)
	x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
	y = tdist.pdf(x)
	ax1.plot(x, y, color='black', linewidth=1)

	# Polish the look of the graph
	ax1.get_yaxis().set_visible(False) # hide the y axis
	ax1.set_ylim(bottom=0)
	ax1.grid(False) # hide the grid
	ax1.spines['top'].set_visible(False) # hide the frame except bottom line
	ax1.spines['right'].set_visible(False)
	ax1.spines['left'].set_visible(False)

	# Plot the rejection zone two tailed
	x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
	x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
	y_zone_1 = tdist.pdf(x_zone_1)
	y_zone_2 = tdist.pdf(x_zone_2)
	ax1.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'threshold $\alpha={}$'.format(alpha))
	ax1.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')

	# Plot the t-test stats
	ax1.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
	ax1.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	# Plot the p-value
	if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
	else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)
	y_t = tdist.pdf(x_t)
	ax1.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))

	# Add a legend
	ax1.legend(loc='upper right')


	### 2. Create the effect size visualization
	ax2 = plt.subplot(122)
	ax2.set_title("Effect size")

	# Plot the theoretical distribution of first sample
	norm = stats.norm(loc=0, scale=1)
	x = np.linspace(norm.ppf(0.0001), norm.ppf(0.9999), 100)
	y = norm.pdf(x)
	ax2.plot(x, y, color='black', alpha=0.3, linewidth=1)
	ax2.fill_between(x, y, 0, color='blue', alpha=0.3, label = 'Year 1 (theoretical)')
	ax2.axvline(x=0, color='blue', alpha=0.5, linestyle='dashed', linewidth=1)

	# Plot the theoretical distribution of second sample (if t > 0 means 2 < 1 so we plot the second sample on the left)
	loc_d = -d if t > 0 else d
	norm_d = stats.norm(loc=loc_d, scale=1)
	x_d = np.linspace(norm_d.ppf(0.0001), norm_d.ppf(0.9999), 100)
	y_d = norm_d.pdf(x_d)
	ax2.plot(x_d, y_d, color='black', alpha=0.3, linewidth=1)
	ax2.fill_between(x_d, y_d, 0, color='green', alpha=0.3, label = 'Year 2 (theoretical)')
	ax2.axvline(x=loc_d, color='green', alpha=0.5, linestyle='dashed', linewidth=1)

	# Display the value of Cohen's d
	max_y = np.max(y)+.02
	ax2.plot([0,loc_d], [max_y, max_y], color='red', linewidth=1, marker=".")
	ax2.annotate("effect size $d$={:.3f}".format(d), xy=(loc_d, max_y), xytext=(15, -5), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "red", alpha = 0.8))

	# Polish the look of the graph
	ax2.get_yaxis().set_visible(False) # hide the y axis
	ax2.set_ylim(bottom=0)
	ax2.grid(False) # hide the grid
	ax2.spines['top'].set_visible(False) # hide the frame except bottom line
	ax2.spines['right'].set_visible(False)
	ax2.spines['left'].set_visible(False)

	# Add a legend
	ax2.legend(loc='upper left')

	# Display the graph
	plt.subplots_adjust(wspace=.1)
	plt.show()


	# EOF

dataanalysis.pyNo OneTemporaryActions

File Metadata

dataanalysis.pyView Options

Event Timeline

dataanalysis.py
No OneTemporary
Actions

dataanalysis.py
View Options