Page MenuHomec4science

dataanalysis.py
No OneTemporary

File Metadata

Created
Tue, Apr 16, 06:38

dataanalysis.py

import numpy as np
from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import HBox, VBox, Label, Layout
import ipywidgets as widgets
from IPython.display import IFrame
from IPython.display import set_matplotlib_formats, display, Math, Markdown, Latex, HTML
set_matplotlib_formats('svg')
# Enable interactive backend for matplotlib
from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import matplotlib.patches as pat
import matplotlib.ticker as ticker
plt.style.use('seaborn-whitegrid') # global style for plotting
from matplotlib.ticker import MultipleLocator
import scipy.stats as stats
def visualize_ttest(sample_size, alpha, t):
# Create the t-test visualization
fig, ax = plt.subplots(figsize=(12, 4))
ax.set_title("Probability distribution of all possible sample means if $H_0$ is true")
# Let's plot the T distribution for this sample size
tdist = stats.t(df=sample_size, loc=0, scale=1)
x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
y = tdist.pdf(x)
ax.plot(x, y, color='black', linewidth=1)
# Polish the look of the graph
ax.get_yaxis().set_visible(False) # hide the y axis
ax.set_ylim(bottom=0)
ax.grid(False) # hide the grid
ax.spines['top'].set_visible(False) # hide the frame except bottom line
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
# Plot the rejection zone two tailed
x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
y_zone_1 = tdist.pdf(x_zone_1)
y_zone_2 = tdist.pdf(x_zone_2)
ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'rejection of $H_0$ with $\alpha={}$'.format(alpha))
ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')
# Plot the t-test stat
ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
ax.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
# Add a legend
ax.legend()
# Display the graph
plt.show()
def visualize_ttest_pvalue(sample_size, alpha, t, p):
# Create the t-test visualization
fig, ax = plt.subplots(figsize=(12, 4))
ax.set_title("Probability distribution of all possible sample means if $H_0$ is true")
# Let's plot the T distribution for this sample size
tdist = stats.t(df=sample_size-1, loc=0, scale=1)
x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
y = tdist.pdf(x)
ax.plot(x, y, color='black', linewidth=1)
# Polish the look of the graph
ax.get_yaxis().set_visible(False) # hide the y axis
ax.set_ylim(bottom=0)
ax.grid(False) # hide the grid
ax.spines['top'].set_visible(False) # hide the frame except bottom line
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
# Plot the rejection zone two tailed
x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
y_zone_1 = tdist.pdf(x_zone_1)
y_zone_2 = tdist.pdf(x_zone_2)
ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'rejection of $H_0$ with $\alpha={}$'.format(alpha))
ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')
# Plot the t-test stats
ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
ax.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
# Plot the p-value
if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)
y_t = tdist.pdf(x_t)
ax.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))
# Add a legend
ax.legend()
# Display the graph
plt.show()
def visualize_ttest2():
return True
def draw_sample(sample_size, mu=5.552, sigma=0.56068):
# sigma = 0.56068
# mu = 5.552
sample_data = sigma * np.random.randn(sample_size) + mu
return sample_data
def plot_sample_histogram(sample, mu, title, color="grey"):
plt.title("Histogram of " + title)
plt.hist(sample, color="lightgrey")
#plt.xticks(np.arange(4.6, 7.2, 0.2))
# Add a vertical line for the population mean
plt.axvline(x=mu, color='black', linestyle='-.', linewidth=2,
label="population mean $\mu$")
# Add a vertical line for the Vuillerens sample mean
plt.axvline(x=np.mean(sample), color=color, linestyle='-.', linewidth=2,
label= title + " mean $m$")
plt.legend()
def plot_t_distribution(df, alpha=None, tail="two", loc=0, scale=1):
fig, ax = plt.subplots(figsize=(10, 4))
plt.title('t distribution for {:} degrees of freedom'.format(df))
plt.xlabel('t')
plt.ylabel('Probability density')
tdist = stats.t(df=df, loc=loc, scale=scale)
# Get 100 values along the x axis from the least probable t value (0.0001) to the most probable t value (0.9999)
x = (np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100))
# Plot the corresponding probabilities to get these t-values
ax.plot(x, tdist.pdf(x), color="firebrick",linestyle='-', lw=1, alpha=1) # label='t[{:}]'.format(df)
ax.grid(b=None, which = 'major', axis='y')
ax.set_ylim(bottom=0)
if (not alpha == None):
if (tail=="two"):
low_cutoff = tdist.ppf(alpha/2)
low_p = tdist.pdf(low_cutoff)
ax.axvline(x=low_cutoff, color='firebrick', linestyle='-.', linewidth=1)
ax.annotate("Cutoff $t$={:.3f}\nfor $\\alpha/2$={:.3f}".format(low_cutoff,alpha/2),
xy=(low_cutoff, low_p),
xytext=(-80, 10),
textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
high_cutoff = tdist.ppf(1-alpha/2)
high_p = tdist.pdf(high_cutoff)
ax.axvline(x=high_cutoff, color='firebrick', linestyle='-.', linewidth=1)
ax.annotate("Cutoff $t$={:.3f} \nfor $1-\\alpha/2$={:.3f}".format(high_cutoff,1-alpha/2),
xy=(high_cutoff, high_p),
xytext=(5, 10),
textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
y_zone_1 = tdist.pdf(x_zone_1)
y_zone_2 = tdist.pdf(x_zone_2)
ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', zorder=10)
ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red', zorder=10)
elif (tail == "lower"):
low_cutoff = tdist.ppf(alpha)
low_p = tdist.pdf(low_cutoff)
ax.axvline(x=low_cutoff, color='firebrick', linestyle='-.', linewidth=1)
ax.annotate("Cutoff $t$={:.3f} \nfor $\\alpha$={:.3f}".format(low_cutoff,alpha),
xy=(low_cutoff, low_p),
xytext=(-80, 10),
textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha), 100)
y_zone_1 = tdist.pdf(x_zone_1)
ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', zorder=10)
elif (tail == "upper"):
high_cutoff = tdist.ppf(1-alpha)
high_p = tdist.pdf(high_cutoff)
ax.axvline(x=high_cutoff, color='firebrick', linestyle='-.', linewidth=1)
ax.annotate("Cutoff $t$={:.3f}\nfor $1-\\alpha$={:.3f}".format(high_cutoff,1-alpha),
xy=(high_cutoff, high_p),
xytext=(5, 10),
textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
x_zone_2 = np.linspace(tdist.ppf(1-alpha), tdist.ppf(0.9999), 100)
y_zone_2 = tdist.pdf(x_zone_2)
ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red', zorder=10)
return ax
def plot_t_test(sample, mu, alpha=None, tail="two"):
sample_size = np.size(sample)
df = sample_size - 1
ax = plot_t_distribution(df, alpha=alpha, tail=tail)
t, p = stats.ttest_1samp(sample, mu)
tdist = stats.t(df=df, loc=0, scale=1)
if (tail == "two"):
cutoff = tdist.ppf(alpha/2)
else:
cutoff = tdist.ppf(alpha)
cutoff_y = tdist.pdf(cutoff)
if (not tail == "two"):
if (tail=="lower" and t < 0):
p = p/2
elif (tail == "lower" and t > 0):
p = 1 - p/2
elif(tail == "upper" and t < 0):
p = 1 - p/2
elif(tail == "upper" and t > 0):
p = p/2
# Plot the p-value
#if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
#else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)
x_t = np.linspace(t, tdist.ppf(0.9999), 100)
if (tail =="two" and t < 0):
x_t = np.linspace(-t, tdist.ppf(0.9999), 100)
y_t = tdist.pdf(x_t)
if (tail == "two" or tail == "upper"):
ax.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))
x_t = np.linspace(tdist.ppf(0.0001), t, 100)
if (tail=="two" and t >=0):
x_t = np.linspace(tdist.ppf(0.0001), -t, 100)
y_t = tdist.pdf(x_t)
if (tail == "two" or tail == "lower"):
ax.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))
ax.axvline(x=t, color='firebrick', linestyle='-', linewidth=1)
if (tail =="two"):
## Add a small vertical segment for the side of the hatching which is not on the side of t
point1 = [-t, 0]
point2 = [-t, tdist.pdf(-t)]
x_values = [point1[0], point2[0]]
y_values = [point1[1], point2[1]]
ax.plot(x_values, y_values, color="firebrick", linestyle='-', linewidth=1)
if (p < 0.3):
annotation_y = p
else:
annotation_y = 0.3
annotation_offset = 0
# are they overlapping ?
if (np.abs(cutoff_y - annotation_y) < 0.05):
if (annotation_y > cutoff_y):
annotation_offset = 25
else:
annotation_offset = -5
ax.annotate('t-test $t$={:.3f}, p={:.3f}'.format(t,p),
xy=(t, annotation_y),
xytext=(5, annotation_offset),
textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
def plot_mean_distribution(mu, alpha=None, means=None, sample_size=50):
plt.figure(figsize=(8, 4))
plt.title('Distribution of sample means for a population with $\mu$= {:}'.format(mu))
plt.xlabel('Mean of samples')
plt.ylabel('Count')
if (means == None):
x = np.linspace(5.2, 5.9, 100)
loc = mu
scale = 0.0796
df = sample_size - 1
tdist = stats.t.pdf(x, sample_size-1, loc=mu, scale=0.0796) ## * (prop_tot * (n_samples / len(nn)))
scale_factor = 1.0
frame1 = plt.gca()
frame1.plot(x, tdist, color='black', linestyle='-', lw=1, alpha=1);
frame1.set_ylim([0,6])
frame1.axes.get_yaxis().set_visible(False)
# In black the theoretical population mean mu
plt.axvline(x=mu, color='black', linestyle='-.', linewidth=2)
# Display mu
frame1.annotate("$\mu$={:.3f}".format(mu),
xy=(mu, 5.5),
xytext=(-20, 0),
textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "black", alpha = 0.8))
else:
# We get a list of means
# A histogram of the means
(nn,bb,pp) = plt.hist(means, color="lightgrey", bins=100, density=False);
### Control
#(nn,bb) = np.histogram(means, bins=100, density=False)
# Add the t-distribution
# The positions in the histogram
x = np.linspace(bb[1], bb[len(bb)-1], len(bb-1))
frame1 = plt.gca()
loc = mu
scale = np.std(means, ddof=1)
df = sample_size - 1
tdist = stats.t.pdf(x, df, loc=mu, scale=scale) ## * (prop_tot * (n_samples / len(nn)))
scale_factor = np.size(means) / np.sum(tdist)
tdist = tdist * scale_factor
plt.plot(x, tdist, color='black', linestyle='-', lw=1, alpha=1);
frame1.axes.get_yaxis().set_visible(False)
frame1.set_ylim(bottom=0)
# In black the theoretical population mean mu
plt.axvline(x=mu, color='black', linestyle='-.', linewidth=2)
# Display mu
frame1.annotate("$\mu$={:.3f}".format(mu),
xy=(mu, np.max(nn)),
xytext=(-20, 0),
textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "black", alpha = 0.8))
if (not alpha == None):
df = sample_size - 1
add_rejection_zones(frame1, loc, scale, df, alpha, scale_factor)
def add_rejection_zones(ax, loc, scale, df, alpha, scale_factor):
mytdist = stats.t(df=df, loc=loc, scale=scale)
# Plot the rejection zone two tailed
x_zone_1 = np.linspace(mytdist.ppf(0.0001), mytdist.ppf(alpha/2), 100)
x_zone_2 = np.linspace(mytdist.ppf(1-alpha/2), mytdist.ppf(0.9999), 100)
y_zone_1 = mytdist.pdf(x_zone_1) * scale_factor
y_zone_2 = mytdist.pdf(x_zone_2) * scale_factor
ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', zorder=10)
ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red', zorder=10)
#ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
#ax.annotate('cut-off $t$={:.3f}'.format(mytdist.ppf(alpha/2)),
# xy=(mytdist.ppf(alpha/2), 0),
# xytext=(mytdist.ppf(alpha/2), mytdist.pdf(alpha/2)), textcoords='offset points',
# bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
def plot_n_and_t(sample_mean, sample_std, mu, from_n, to_n, step_n, plotp=False, df=49, alpha=0.05):
######
# Compute t-values (t) for different values of sample size (n)
# by using the Vuillerens sample_mean and sample_std.
nvalues = []
tvalues = []
pvalues = []
# For sample sizes ranging from 10 to 60 in steps of 5
for n in np.arange(from_n, to_n, step_n):
# Compute the t-value if the sample size was n and with the
# sample_mean and sample_std from the Vuillerens sample
t = (sample_mean - mu) / (sample_std/np.sqrt(n))
# Collect the n and t values
df = n-1
p = stats.t.pdf(t, n-1)
pvalues.append(p)
nvalues.append(n)
tvalues.append(t)
######
# Plot the relation between sample size and t-value and p-value
fig, ax = plt.subplots(figsize=(8, 4))
# Set the axes
plt.title('$|t|$ as a function of sample size (from {:} to {:})'.format(from_n, to_n))
ax.set_xlabel('Sample size', color="black")
ax.set_ylabel('|t|', color="firebrick")
ax.tick_params(axis='y', labelcolor="firebrick")
spacing = step_n # This can be your user specified spacing.
minorLocator = MultipleLocator(spacing)
# Set minor tick locations.
ax.xaxis.set_minor_locator(minorLocator)
# Set grid to use minor tick locations.
ax.grid(which = 'minor', axis = 'x', linestyle='dotted')
ax.grid(which = 'major', axis='x', linestyle='-')
ax.grid(b=None, which = 'major', axis='y')
# Plot the t-values that correspond to different samples sizes
ax.plot(nvalues, np.abs(tvalues), color="firebrick")
tdist = stats.t(df=50-1, loc=0, scale=1)
cutoff = tdist.ppf(1-alpha/2)
# Draw a horizontal line for the cutoff point at alpha = 0.025 (upper tail)
# If our t is negative, the cutoff point can also be negative (lower tail)
#if (tvalues[0] < 0):
# cutoff = -cutoff
plt.rcParams['path.sketch'] = (1, 100, 2)
ax.axhline(y=2.01, color='firebrick', linestyle='-.', linewidth=1);
plt.rcParams['path.sketch'] = None
ax.annotate('cutoff $t_{\\alpha=0.05} \\approx 2.00$', xy=(10, 2.00), xytext=(10, 0), textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
plt.rcParams['path.sketch'] = (1, 100, 2)
ax.axhline(y=2.61, color='firebrick', linestyle='-.', linewidth=1);
plt.rcParams['path.sketch'] = None
ax.annotate('cutoff $t_{\\alpha=0.01} \\approx 2.66$', xy=(10, 2.66), xytext=(10, 0), textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
# Draw a vertical grey line at the sample size that corresponds to the cutoff point for alpha=0.05
#n = ((cutoff * sample_std) / (sample_mean - mu)) ** 2
#ax.axvline(x=n, color='grey', linestyle='-.', linewidth=1);
if (plotp):
ax2 = ax.twinx()
# Plot the p-values that correspond to different sample sizes
ax2.plot(nvalues, pvalues, color="blue")
ax2.set_xlabel("Sample Size")
ax2.set_ylabel('p-value', color="blue")
ax2.tick_params(axis='y', labelcolor="blue")
ax2.grid(which = 'major', axis='x', linestyle='-')
ax2.axhline(y=0.05, color='blue', linestyle='-.', linewidth=1);
ax2.annotate('$\\alpha = 0.05$', xy=(to_n, 0.05), xytext=(-40, 0), textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "blue", alpha = 0.8))
ax2.axhline(y=0.01, color='blue', linestyle='-.', linewidth=1);
ax2.annotate('$\\alpha = 0.01$', xy=(to_n, 0.01), xytext=(-40, 0), textcoords='offset points',
bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "blue", alpha = 0.8))
#ax2.axhline(y=alpha, color='blue', linestyle='-.', linewidth=2);
def one_sample_one_tailed(sample_data, mu, alpha=0.05, alternative='greater'):
t, p = stats.ttest_1samp(sample_data, mu)
p = p*2
print ('t:',t)
print ('p:',p)
if alternative == 'greater' and (p < alpha) and t > 0:
print ('Reject Null Hypothesis for greater-than test')
if alternative == 'less' and (p < alpha) and t < 0:
print ('Reject Null Hypothesis for less-thane test')
###### TO DELETE
def build_ttest_visualization(ttest_result, alpha):
# Extract information from the result of the t-test
n = round(ttest_result.loc["T-test","dof"])
t = ttest_result.loc["T-test","T"]
p = ttest_result.loc["T-test","p-val"]
d = ttest_result.loc["T-test","cohen-d"]
# Create the figure
fig = plt.figure(figsize=(14, 4))
### 1. Create the t-test visualization
ax1 = plt.subplot(121)
ax1.set_title("Result of the t-test")
# Let's plot the T distribution for this sample size
tdist = stats.t(df=n, loc=0, scale=1)
x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
y = tdist.pdf(x)
ax1.plot(x, y, color='black', linewidth=1)
# Polish the look of the graph
ax1.get_yaxis().set_visible(False) # hide the y axis
ax1.set_ylim(bottom=0)
ax1.grid(False) # hide the grid
ax1.spines['top'].set_visible(False) # hide the frame except bottom line
ax1.spines['right'].set_visible(False)
ax1.spines['left'].set_visible(False)
# Plot the rejection zone two tailed
x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
y_zone_1 = tdist.pdf(x_zone_1)
y_zone_2 = tdist.pdf(x_zone_2)
ax1.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'threshold $\alpha={}$'.format(alpha))
ax1.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')
# Plot the t-test stats
ax1.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
ax1.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
# Plot the p-value
if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)
y_t = tdist.pdf(x_t)
ax1.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))
# Add a legend
ax1.legend(loc='upper right')
### 2. Create the effect size visualization
ax2 = plt.subplot(122)
ax2.set_title("Effect size")
# Plot the theoretical distribution of first sample
norm = stats.norm(loc=0, scale=1)
x = np.linspace(norm.ppf(0.0001), norm.ppf(0.9999), 100)
y = norm.pdf(x)
ax2.plot(x, y, color='black', alpha=0.3, linewidth=1)
ax2.fill_between(x, y, 0, color='blue', alpha=0.3, label = 'Year 1 (theoretical)')
ax2.axvline(x=0, color='blue', alpha=0.5, linestyle='dashed', linewidth=1)
# Plot the theoretical distribution of second sample (if t > 0 means 2 < 1 so we plot the second sample on the left)
loc_d = -d if t > 0 else d
norm_d = stats.norm(loc=loc_d, scale=1)
x_d = np.linspace(norm_d.ppf(0.0001), norm_d.ppf(0.9999), 100)
y_d = norm_d.pdf(x_d)
ax2.plot(x_d, y_d, color='black', alpha=0.3, linewidth=1)
ax2.fill_between(x_d, y_d, 0, color='green', alpha=0.3, label = 'Year 2 (theoretical)')
ax2.axvline(x=loc_d, color='green', alpha=0.5, linestyle='dashed', linewidth=1)
# Display the value of Cohen's d
max_y = np.max(y)+.02
ax2.plot([0,loc_d], [max_y, max_y], color='red', linewidth=1, marker=".")
ax2.annotate("effect size $d$={:.3f}".format(d), xy=(loc_d, max_y), xytext=(15, -5), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "red", alpha = 0.8))
# Polish the look of the graph
ax2.get_yaxis().set_visible(False) # hide the y axis
ax2.set_ylim(bottom=0)
ax2.grid(False) # hide the grid
ax2.spines['top'].set_visible(False) # hide the frame except bottom line
ax2.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
# Add a legend
ax2.legend(loc='upper left')
# Display the graph
plt.subplots_adjust(wspace=.1)
plt.show()
# EOF

Event Timeline