dataanalysis.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, Jul 3, 10:42

dataanalysis.py
View Options

	import numpy as np

	from ipywidgets import interact, interactive, fixed, interact_manual
	from ipywidgets import HBox, VBox, Label, Layout
	import ipywidgets as widgets

	from IPython.display import IFrame
	from IPython.display import set_matplotlib_formats, display, Math, Markdown, Latex, HTML
	set_matplotlib_formats('svg')

	# Enable interactive backend for matplotlib
	from IPython import get_ipython
	get_ipython().run_line_magic('matplotlib', 'inline')

	import matplotlib.pyplot as plt
	import matplotlib.patches as pat
	import matplotlib.ticker as ticker
	plt.style.use('seaborn-whitegrid') # global style for plotting

	from matplotlib.ticker import MultipleLocator

	import scipy.stats as stats

	def visualize_ttest(sample_size, alpha, t):
	# Create the t-test visualization
	fig, ax = plt.subplots(figsize=(12, 4))
	ax.set_title("Probability distribution of all possible sample means if $H_0$ is true")

	# Let's plot the T distribution for this sample size
	tdist = stats.t(df=sample_size, loc=0, scale=1)
	x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
	y = tdist.pdf(x)
	ax.plot(x, y, color='black', linewidth=1)

	# Polish the look of the graph
	ax.get_yaxis().set_visible(False) # hide the y axis
	ax.set_ylim(bottom=0)
	ax.grid(False) # hide the grid
	ax.spines['top'].set_visible(False) # hide the frame except bottom line
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_visible(False)

	# Plot the rejection zone two tailed
	x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
	x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
	y_zone_1 = tdist.pdf(x_zone_1)
	y_zone_2 = tdist.pdf(x_zone_2)
	ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'rejection of $H_0$ with $\alpha={}$'.format(alpha))
	ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')

	# Plot the t-test stat
	ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
	ax.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	# Add a legend
	ax.legend()

	# Display the graph
	plt.show()

	def visualize_ttest_pvalue(sample_size, alpha, t, p):

	# Create the t-test visualization
	fig, ax = plt.subplots(figsize=(12, 4))
	ax.set_title("Probability distribution of all possible sample means if $H_0$ is true")

	# Let's plot the T distribution for this sample size
	tdist = stats.t(df=sample_size-1, loc=0, scale=1)
	x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
	y = tdist.pdf(x)
	ax.plot(x, y, color='black', linewidth=1)

	# Polish the look of the graph
	ax.get_yaxis().set_visible(False) # hide the y axis
	ax.set_ylim(bottom=0)
	ax.grid(False) # hide the grid
	ax.spines['top'].set_visible(False) # hide the frame except bottom line
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_visible(False)

	# Plot the rejection zone two tailed
	x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
	x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
	y_zone_1 = tdist.pdf(x_zone_1)
	y_zone_2 = tdist.pdf(x_zone_2)
	ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'rejection of $H_0$ with $\alpha={}$'.format(alpha))
	ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')

	# Plot the t-test stats
	ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
	ax.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	# Plot the p-value
	if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
	else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)
	y_t = tdist.pdf(x_t)
	ax.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))


	# Add a legend
	ax.legend()

	# Display the graph
	plt.show()

	def visualize_ttest2():
	return True


	def draw_sample(sample_size, mu=5.552, sigma=0.56068):
	# sigma = 0.56068
	# mu = 5.552
	sample_data = sigma * np.random.randn(sample_size) + mu
	return sample_data

	def plot_sample_histogram(sample, mu, title, color="grey"):

	plt.title("Histogram of " + title)
	plt.hist(sample, color="lightgrey")
	#plt.xticks(np.arange(4.6, 7.2, 0.2))

	# Add a vertical line for the population mean
	plt.axvline(x=mu, color='black', linestyle='-.', linewidth=2,
	label="population mean $\mu$")
	# Add a vertical line for the Vuillerens sample mean
	plt.axvline(x=np.mean(sample), color=color, linestyle='-.', linewidth=2,
	label= title + " mean $m$")
	plt.legend()


	def plot_t_distribution(df, alpha=None, tail="two", loc=0, scale=1):

	fig, ax = plt.subplots(figsize=(10, 4))

	plt.title('t distribution for {:} degrees of freedom'.format(df))
	plt.xlabel('t')
	plt.ylabel('Probability density')

	tdist = stats.t(df=df, loc=loc, scale=scale)

	# Get 100 values along the x axis from the least probable t value (0.0001) to the most probable t value (0.9999)
	x = (np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100))

	# Plot the corresponding probabilities to get these t-values
	ax.plot(x, tdist.pdf(x), color="firebrick",linestyle='-', lw=1, alpha=1) # label='t[{:}]'.format(df)
	ax.grid(b=None, which = 'major', axis='y')
	ax.set_ylim(bottom=0)

	if (not alpha == None):

	if (tail=="two"):

	low_cutoff = tdist.ppf(alpha/2)
	low_p = tdist.pdf(low_cutoff)

	ax.axvline(x=low_cutoff, color='firebrick', linestyle='-.', linewidth=1)
	ax.annotate("Cutoff $t$={:.3f}\nfor $\\alpha/2$={:.3f}".format(low_cutoff,alpha/2),
	xy=(low_cutoff, low_p),
	xytext=(-80, 10),
	textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	high_cutoff = tdist.ppf(1-alpha/2)
	high_p = tdist.pdf(high_cutoff)
	ax.axvline(x=high_cutoff, color='firebrick', linestyle='-.', linewidth=1)
	ax.annotate("Cutoff $t$={:.3f} \nfor $1-\\alpha/2$={:.3f}".format(high_cutoff,1-alpha/2),
	xy=(high_cutoff, high_p),
	xytext=(5, 10),
	textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
	x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
	y_zone_1 = tdist.pdf(x_zone_1)
	y_zone_2 = tdist.pdf(x_zone_2)
	ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', zorder=10)
	ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red', zorder=10)

	elif (tail == "lower"):
	low_cutoff = tdist.ppf(alpha)
	low_p = tdist.pdf(low_cutoff)

	ax.axvline(x=low_cutoff, color='firebrick', linestyle='-.', linewidth=1)
	ax.annotate("Cutoff $t$={:.3f} \nfor $\\alpha$={:.3f}".format(low_cutoff,alpha),
	xy=(low_cutoff, low_p),
	xytext=(-80, 10),
	textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha), 100)
	y_zone_1 = tdist.pdf(x_zone_1)
	ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', zorder=10)


	elif (tail == "upper"):
	high_cutoff = tdist.ppf(1-alpha)
	high_p = tdist.pdf(high_cutoff)
	ax.axvline(x=high_cutoff, color='firebrick', linestyle='-.', linewidth=1)
	ax.annotate("Cutoff $t$={:.3f}\nfor $1-\\alpha$={:.3f}".format(high_cutoff,1-alpha),
	xy=(high_cutoff, high_p),
	xytext=(5, 10),
	textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	x_zone_2 = np.linspace(tdist.ppf(1-alpha), tdist.ppf(0.9999), 100)
	y_zone_2 = tdist.pdf(x_zone_2)
	ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red', zorder=10)

	return ax

	def plot_t_test(sample, mu, alpha=None, tail="two"):

	sample_size = np.size(sample)
	df = sample_size - 1

	ax = plot_t_distribution(df, alpha=alpha, tail=tail)
	t, p = stats.ttest_1samp(sample, mu)

	tdist = stats.t(df=df, loc=0, scale=1)

	if (tail == "two"):
	cutoff = tdist.ppf(alpha/2)
	else:
	cutoff = tdist.ppf(alpha)

	cutoff_y = tdist.pdf(cutoff)


	if (not tail == "two"):
	if (tail=="lower" and t < 0):
	p = p/2
	elif (tail == "lower" and t > 0):
	p = 1 - p/2
	elif(tail == "upper" and t < 0):
	p = 1 - p/2
	elif(tail == "upper" and t > 0):
	p = p/2



	# Plot the p-value
	#if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
	#else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)

	x_t = np.linspace(t, tdist.ppf(0.9999), 100)

	if (tail =="two" and t < 0):
	x_t = np.linspace(-t, tdist.ppf(0.9999), 100)

	y_t = tdist.pdf(x_t)
	if (tail == "two" or tail == "upper"):
	ax.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))

	x_t = np.linspace(tdist.ppf(0.0001), t, 100)

	if (tail=="two" and t >=0):
	x_t = np.linspace(tdist.ppf(0.0001), -t, 100)

	y_t = tdist.pdf(x_t)
	if (tail == "two" or tail == "lower"):
	ax.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))

	ax.axvline(x=t, color='firebrick', linestyle='-', linewidth=1)

	if (tail =="two"):
	## Add a small vertical segment for the side of the hatching which is not on the side of t
	point1 = [-t, 0]
	point2 = [-t, tdist.pdf(-t)]

	x_values = [point1[0], point2[0]]
	y_values = [point1[1], point2[1]]
	ax.plot(x_values, y_values, color="firebrick", linestyle='-', linewidth=1)



	if (p < 0.3):
	annotation_y = p
	else:
	annotation_y = 0.3

	annotation_offset = 0
	# are they overlapping ?
	if (np.abs(cutoff_y - annotation_y) < 0.05):
	if (annotation_y > cutoff_y):
	annotation_offset = 25
	else:
	annotation_offset = -5

	ax.annotate('t-test $t$={:.3f}, p={:.3f}'.format(t,p),
	xy=(t, annotation_y),
	xytext=(5, annotation_offset),
	textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))




	def plot_mean_distribution(mu, alpha=None, means=None, sample_size=50):

	plt.figure(figsize=(8, 4))

	plt.title('Distribution of sample means for a population with $\mu$= {:}'.format(mu))
	plt.xlabel('Mean of samples')
	plt.ylabel('Count')

	if (means == None):

	x = np.linspace(5.2, 5.9, 100)

	loc = mu
	scale = 0.0796
	df = sample_size - 1

	tdist = stats.t.pdf(x, sample_size-1, loc=mu, scale=0.0796) ## * (prop_tot * (n_samples / len(nn)))
	scale_factor = 1.0
	frame1 = plt.gca()
	frame1.plot(x, tdist, color='black', linestyle='-', lw=1, alpha=1);
	frame1.set_ylim([0,6])
	frame1.axes.get_yaxis().set_visible(False)

	# In black the theoretical population mean mu
	plt.axvline(x=mu, color='black', linestyle='-.', linewidth=2)

	# Display mu
	frame1.annotate("$\mu$={:.3f}".format(mu),
	xy=(mu, 5.5),
	xytext=(-20, 0),
	textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "black", alpha = 0.8))

	else:
	# We get a list of means

	# A histogram of the means
	(nn,bb,pp) = plt.hist(means, color="lightgrey", bins=100, density=False);

	### Control
	#(nn,bb) = np.histogram(means, bins=100, density=False)

	# Add the t-distribution
	# The positions in the histogram
	x = np.linspace(bb[1], bb[len(bb)-1], len(bb-1))

	frame1 = plt.gca()

	loc = mu
	scale = np.std(means, ddof=1)
	df = sample_size - 1

	tdist = stats.t.pdf(x, df, loc=mu, scale=scale) ## * (prop_tot * (n_samples / len(nn)))

	scale_factor = np.size(means) / np.sum(tdist)
	tdist = tdist * scale_factor

	plt.plot(x, tdist, color='black', linestyle='-', lw=1, alpha=1);

	frame1.axes.get_yaxis().set_visible(False)
	frame1.set_ylim(bottom=0)

	# In black the theoretical population mean mu
	plt.axvline(x=mu, color='black', linestyle='-.', linewidth=2)

	# Display mu
	frame1.annotate("$\mu$={:.3f}".format(mu),
	xy=(mu, np.max(nn)),
	xytext=(-20, 0),
	textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "black", alpha = 0.8))

	if (not alpha == None):
	df = sample_size - 1
	add_rejection_zones(frame1, loc, scale, df, alpha, scale_factor)

	def add_rejection_zones(ax, loc, scale, df, alpha, scale_factor):

	mytdist = stats.t(df=df, loc=loc, scale=scale)
	# Plot the rejection zone two tailed
	x_zone_1 = np.linspace(mytdist.ppf(0.0001), mytdist.ppf(alpha/2), 100)
	x_zone_2 = np.linspace(mytdist.ppf(1-alpha/2), mytdist.ppf(0.9999), 100)
	y_zone_1 = mytdist.pdf(x_zone_1) * scale_factor
	y_zone_2 = mytdist.pdf(x_zone_2) * scale_factor
	ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', zorder=10)
	ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red', zorder=10)

	#ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
	#ax.annotate('cut-off $t$={:.3f}'.format(mytdist.ppf(alpha/2)),
	# xy=(mytdist.ppf(alpha/2), 0),
	# xytext=(mytdist.ppf(alpha/2), mytdist.pdf(alpha/2)), textcoords='offset points',
	# bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))


	def plot_n_and_t(sample_mean, sample_std, mu, from_n, to_n, step_n, plotp=False, df=49, alpha=0.05):

	######
	# Compute t-values (t) for different values of sample size (n)
	# by using the Vuillerens sample_mean and sample_std.

	nvalues = []
	tvalues = []
	pvalues = []

	# For sample sizes ranging from 10 to 60 in steps of 5
	for n in np.arange(from_n, to_n, step_n):

	# Compute the t-value if the sample size was n and with the
	# sample_mean and sample_std from the Vuillerens sample
	t = (sample_mean - mu) / (sample_std/np.sqrt(n))

	# Collect the n and t values

	df = n-1
	p = stats.t.pdf(t, n-1)

	pvalues.append(p)
	nvalues.append(n)
	tvalues.append(t)

	######
	# Plot the relation between sample size and t-value and p-value
	fig, ax = plt.subplots(figsize=(8, 4))

	# Set the axes
	plt.title('$\|t\|$ as a function of sample size (from {:} to {:})'.format(from_n, to_n))

	ax.set_xlabel('Sample size', color="black")
	ax.set_ylabel('\|t\|', color="firebrick")
	ax.tick_params(axis='y', labelcolor="firebrick")

	spacing = step_n # This can be your user specified spacing.
	minorLocator = MultipleLocator(spacing)
	# Set minor tick locations.
	ax.xaxis.set_minor_locator(minorLocator)
	# Set grid to use minor tick locations.
	ax.grid(which = 'minor', axis = 'x', linestyle='dotted')
	ax.grid(which = 'major', axis='x', linestyle='-')
	ax.grid(b=None, which = 'major', axis='y')


	# Plot the t-values that correspond to different samples sizes
	ax.plot(nvalues, np.abs(tvalues), color="firebrick")

	tdist = stats.t(df=50-1, loc=0, scale=1)
	cutoff = tdist.ppf(1-alpha/2)


	# Draw a horizontal line for the cutoff point at alpha = 0.025 (upper tail)
	# If our t is negative, the cutoff point can also be negative (lower tail)
	#if (tvalues[0] < 0):
	# cutoff = -cutoff

	plt.rcParams['path.sketch'] = (1, 100, 2)
	ax.axhline(y=2.01, color='firebrick', linestyle='-.', linewidth=1);
	plt.rcParams['path.sketch'] = None
	ax.annotate('cutoff $t_{\\alpha=0.05} \\approx 2.00$', xy=(10, 2.00), xytext=(10, 0), textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))


	plt.rcParams['path.sketch'] = (1, 100, 2)
	ax.axhline(y=2.61, color='firebrick', linestyle='-.', linewidth=1);
	plt.rcParams['path.sketch'] = None
	ax.annotate('cutoff $t_{\\alpha=0.01} \\approx 2.66$', xy=(10, 2.66), xytext=(10, 0), textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))


	# Draw a vertical grey line at the sample size that corresponds to the cutoff point for alpha=0.05
	#n = ((cutoff * sample_std) / (sample_mean - mu)) ** 2
	#ax.axvline(x=n, color='grey', linestyle='-.', linewidth=1);

	if (plotp):

	ax2 = ax.twinx()

	# Plot the p-values that correspond to different sample sizes
	ax2.plot(nvalues, pvalues, color="blue")

	ax2.set_xlabel("Sample Size")
	ax2.set_ylabel('p-value', color="blue")
	ax2.tick_params(axis='y', labelcolor="blue")

	ax2.grid(which = 'major', axis='x', linestyle='-')


	ax2.axhline(y=0.05, color='blue', linestyle='-.', linewidth=1);
	ax2.annotate('$\\alpha = 0.05$', xy=(to_n, 0.05), xytext=(-40, 0), textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "blue", alpha = 0.8))

	ax2.axhline(y=0.01, color='blue', linestyle='-.', linewidth=1);
	ax2.annotate('$\\alpha = 0.01$', xy=(to_n, 0.01), xytext=(-40, 0), textcoords='offset points',
	bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "blue", alpha = 0.8))


	#ax2.axhline(y=alpha, color='blue', linestyle='-.', linewidth=2);



	def one_sample_one_tailed(sample_data, mu, alpha=0.05, alternative='greater'):
	t, p = stats.ttest_1samp(sample_data, mu)
	p = p*2
	print ('t:',t)
	print ('p:',p)
	if alternative == 'greater' and (p < alpha) and t > 0:
	print ('Reject Null Hypothesis for greater-than test')
	if alternative == 'less' and (p < alpha) and t < 0:
	print ('Reject Null Hypothesis for less-thane test')


	###### TO DELETE

	def build_ttest_visualization(ttest_result, alpha):
	# Extract information from the result of the t-test
	n = round(ttest_result.loc["T-test","dof"])
	t = ttest_result.loc["T-test","T"]
	p = ttest_result.loc["T-test","p-val"]
	d = ttest_result.loc["T-test","cohen-d"]

	# Create the figure
	fig = plt.figure(figsize=(14, 4))

	### 1. Create the t-test visualization
	ax1 = plt.subplot(121)
	ax1.set_title("Result of the t-test")

	# Let's plot the T distribution for this sample size
	tdist = stats.t(df=n, loc=0, scale=1)
	x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
	y = tdist.pdf(x)
	ax1.plot(x, y, color='black', linewidth=1)

	# Polish the look of the graph
	ax1.get_yaxis().set_visible(False) # hide the y axis
	ax1.set_ylim(bottom=0)
	ax1.grid(False) # hide the grid
	ax1.spines['top'].set_visible(False) # hide the frame except bottom line
	ax1.spines['right'].set_visible(False)
	ax1.spines['left'].set_visible(False)

	# Plot the rejection zone two tailed
	x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
	x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
	y_zone_1 = tdist.pdf(x_zone_1)
	y_zone_2 = tdist.pdf(x_zone_2)
	ax1.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'threshold $\alpha={}$'.format(alpha))
	ax1.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')

	# Plot the t-test stats
	ax1.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
	ax1.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))

	# Plot the p-value
	if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
	else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)
	y_t = tdist.pdf(x_t)
	ax1.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))

	# Add a legend
	ax1.legend(loc='upper right')


	### 2. Create the effect size visualization
	ax2 = plt.subplot(122)
	ax2.set_title("Effect size")

	# Plot the theoretical distribution of first sample
	norm = stats.norm(loc=0, scale=1)
	x = np.linspace(norm.ppf(0.0001), norm.ppf(0.9999), 100)
	y = norm.pdf(x)
	ax2.plot(x, y, color='black', alpha=0.3, linewidth=1)
	ax2.fill_between(x, y, 0, color='blue', alpha=0.3, label = 'Year 1 (theoretical)')
	ax2.axvline(x=0, color='blue', alpha=0.5, linestyle='dashed', linewidth=1)

	# Plot the theoretical distribution of second sample (if t > 0 means 2 < 1 so we plot the second sample on the left)
	loc_d = -d if t > 0 else d
	norm_d = stats.norm(loc=loc_d, scale=1)
	x_d = np.linspace(norm_d.ppf(0.0001), norm_d.ppf(0.9999), 100)
	y_d = norm_d.pdf(x_d)
	ax2.plot(x_d, y_d, color='black', alpha=0.3, linewidth=1)
	ax2.fill_between(x_d, y_d, 0, color='green', alpha=0.3, label = 'Year 2 (theoretical)')
	ax2.axvline(x=loc_d, color='green', alpha=0.5, linestyle='dashed', linewidth=1)

	# Display the value of Cohen's d
	max_y = np.max(y)+.02
	ax2.plot([0,loc_d], [max_y, max_y], color='red', linewidth=1, marker=".")
	ax2.annotate("effect size $d$={:.3f}".format(d), xy=(loc_d, max_y), xytext=(15, -5), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "red", alpha = 0.8))

	# Polish the look of the graph
	ax2.get_yaxis().set_visible(False) # hide the y axis
	ax2.set_ylim(bottom=0)
	ax2.grid(False) # hide the grid
	ax2.spines['top'].set_visible(False) # hide the frame except bottom line
	ax2.spines['right'].set_visible(False)
	ax2.spines['left'].set_visible(False)

	# Add a legend
	ax2.legend(loc='upper left')

	# Display the graph
	plt.subplots_adjust(wspace=.1)
	plt.show()


	# EOF

dataanalysis.pyNo OneTemporaryActions

File Metadata

dataanalysis.pyView Options

Event Timeline

dataanalysis.py
No OneTemporary
Actions

dataanalysis.py
View Options