Page MenuHomec4science

dataanalysis.py
No OneTemporary

File Metadata

Created
Thu, May 16, 18:31

dataanalysis.py

import numpy as np
from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import HBox, VBox, Label, Layout
import ipywidgets as widgets
from IPython.display import IFrame
from IPython.display import set_matplotlib_formats, display, Math, Markdown, Latex, HTML
set_matplotlib_formats('svg')
# Enable interactive backend for matplotlib
from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import matplotlib.patches as pat
import matplotlib.ticker as ticker
plt.style.use('seaborn-whitegrid') # global style for plotting
import scipy.stats as stats
def visualize_ttest(sample_size, alpha, t):
# Create the t-test visualization
fig, ax = plt.subplots(figsize=(12, 4))
ax.set_title("Probability distribution of all possible sample means if $H_0$ is true")
# Let's plot the T distribution for this sample size
tdist = stats.t(df=sample_size, loc=0, scale=1)
x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
y = tdist.pdf(x)
ax.plot(x, y, color='black', linewidth=1)
# Polish the look of the graph
ax.get_yaxis().set_visible(False) # hide the y axis
ax.set_ylim(bottom=0)
ax.grid(False) # hide the grid
ax.spines['top'].set_visible(False) # hide the frame except bottom line
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
# Plot the rejection zone two tailed
x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
y_zone_1 = tdist.pdf(x_zone_1)
y_zone_2 = tdist.pdf(x_zone_2)
ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'rejection of $H_0$ with $\alpha={}$'.format(alpha))
ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')
# Plot the t-test stat
ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
ax.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
# Add a legend
ax.legend()
# Display the graph
plt.show()
def visualize_ttest_pvalue(sample_size, alpha, t, p):
# Create the t-test visualization
fig, ax = plt.subplots(figsize=(12, 4))
ax.set_title("Probability distribution of all possible sample means if $H_0$ is true")
# Let's plot the T distribution for this sample size
tdist = stats.t(df=sample_size, loc=0, scale=1)
x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
y = tdist.pdf(x)
ax.plot(x, y, color='black', linewidth=1)
# Polish the look of the graph
ax.get_yaxis().set_visible(False) # hide the y axis
ax.set_ylim(bottom=0)
ax.grid(False) # hide the grid
ax.spines['top'].set_visible(False) # hide the frame except bottom line
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
# Plot the rejection zone two tailed
x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
y_zone_1 = tdist.pdf(x_zone_1)
y_zone_2 = tdist.pdf(x_zone_2)
ax.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'rejection of $H_0$ with $\alpha={}$'.format(alpha))
ax.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')
# Plot the t-test stats
ax.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
ax.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
# Plot the p-value
if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)
y_t = tdist.pdf(x_t)
ax.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))
# Add a legend
ax.legend()
# Display the graph
plt.show()
###### TO DELETE
def build_ttest_visualization(ttest_result, alpha):
# Extract information from the result of the t-test
n = round(ttest_result.loc["T-test","dof"])
t = ttest_result.loc["T-test","T"]
p = ttest_result.loc["T-test","p-val"]
d = ttest_result.loc["T-test","cohen-d"]
# Create the figure
fig = plt.figure(figsize=(14, 4))
### 1. Create the t-test visualization
ax1 = plt.subplot(121)
ax1.set_title("Result of the t-test")
# Let's plot the T distribution for this sample size
tdist = stats.t(df=n, loc=0, scale=1)
x = np.linspace(tdist.ppf(0.0001), tdist.ppf(0.9999), 100)
y = tdist.pdf(x)
ax1.plot(x, y, color='black', linewidth=1)
# Polish the look of the graph
ax1.get_yaxis().set_visible(False) # hide the y axis
ax1.set_ylim(bottom=0)
ax1.grid(False) # hide the grid
ax1.spines['top'].set_visible(False) # hide the frame except bottom line
ax1.spines['right'].set_visible(False)
ax1.spines['left'].set_visible(False)
# Plot the rejection zone two tailed
x_zone_1 = np.linspace(tdist.ppf(0.0001), tdist.ppf(alpha/2), 100)
x_zone_2 = np.linspace(tdist.ppf(1-alpha/2), tdist.ppf(0.9999), 100)
y_zone_1 = tdist.pdf(x_zone_1)
y_zone_2 = tdist.pdf(x_zone_2)
ax1.fill_between(x_zone_1, y_zone_1, 0, alpha=0.3, color='red', label = r'threshold $\alpha={}$'.format(alpha))
ax1.fill_between(x_zone_2, y_zone_2, 0, alpha=0.3, color='red')
# Plot the t-test stats
ax1.axvline(x=t, color='firebrick', linestyle='dashed', linewidth=1)
ax1.annotate('t-test $t$={:.3f}'.format(t), xy=(t, 0), xytext=(-10, 130), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "firebrick", alpha = 0.8))
# Plot the p-value
if t >= 0: x_t = np.linspace(t, tdist.ppf(0.9999), 100)
else: x_t = np.linspace(tdist.ppf(0.0001), t, 100)
y_t = tdist.pdf(x_t)
ax1.fill_between(x_t, y_t, 0, facecolor="none", edgecolor="firebrick", hatch="///", linewidth=0.0, label = r'p-value $p$={:.3f}'.format(p))
# Add a legend
ax1.legend(loc='upper right')
### 2. Create the effect size visualization
ax2 = plt.subplot(122)
ax2.set_title("Effect size")
# Plot the theoretical distribution of first sample
norm = stats.norm(loc=0, scale=1)
x = np.linspace(norm.ppf(0.0001), norm.ppf(0.9999), 100)
y = norm.pdf(x)
ax2.plot(x, y, color='black', alpha=0.3, linewidth=1)
ax2.fill_between(x, y, 0, color='blue', alpha=0.3, label = 'Year 1 (theoretical)')
ax2.axvline(x=0, color='blue', alpha=0.5, linestyle='dashed', linewidth=1)
# Plot the theoretical distribution of second sample (if t > 0 means 2 < 1 so we plot the second sample on the left)
loc_d = -d if t > 0 else d
norm_d = stats.norm(loc=loc_d, scale=1)
x_d = np.linspace(norm_d.ppf(0.0001), norm_d.ppf(0.9999), 100)
y_d = norm_d.pdf(x_d)
ax2.plot(x_d, y_d, color='black', alpha=0.3, linewidth=1)
ax2.fill_between(x_d, y_d, 0, color='green', alpha=0.3, label = 'Year 2 (theoretical)')
ax2.axvline(x=loc_d, color='green', alpha=0.5, linestyle='dashed', linewidth=1)
# Display the value of Cohen's d
max_y = np.max(y)+.02
ax2.plot([0,loc_d], [max_y, max_y], color='red', linewidth=1, marker=".")
ax2.annotate("effect size $d$={:.3f}".format(d), xy=(loc_d, max_y), xytext=(15, -5), textcoords='offset points', bbox=dict(boxstyle="round", facecolor = "white", edgecolor = "red", alpha = 0.8))
# Polish the look of the graph
ax2.get_yaxis().set_visible(False) # hide the y axis
ax2.set_ylim(bottom=0)
ax2.grid(False) # hide the grid
ax2.spines['top'].set_visible(False) # hide the frame except bottom line
ax2.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
# Add a legend
ax2.legend(loc='upper left')
# Display the graph
plt.subplots_adjust(wspace=.1)
plt.show()
# EOF

Event Timeline