-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstats.py
More file actions
101 lines (76 loc) · 3.03 KB
/
stats.py
File metadata and controls
101 lines (76 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import numpy as np
import scipy.stats as scs
def pooled_prob(N_A, N_B, X_A, X_B):
"""Returns pooled probability for two samples"""
return (X_A + X_B) / (N_A + N_B)
def pooled_SE(N_A, N_B, X_A, X_B):
"""Returns the pooled standard error for two samples"""
p_hat = pooled_prob(N_A, N_B, X_A, X_B)
SE = np.sqrt(p_hat * (1 - p_hat) * (1 / N_A + 1 / N_B))
return SE
def confidence_interval(sample_mean=0, sample_std=1, sample_size=1,
sig_level=0.05):
"""Returns the confidence interval as a tuple"""
z = z_val(sig_level)
left = sample_mean - z * sample_std / np.sqrt(sample_size)
right = sample_mean + z * sample_std / np.sqrt(sample_size)
return (left, right)
def z_val(sig_level=0.05, two_tailed=True):
"""Returns the z value for a given significance level"""
z_dist = scs.norm()
if two_tailed:
sig_level = sig_level/2
area = 1 - sig_level
else:
area = 1 - sig_level
z = z_dist.ppf(area)
return z
def ab_dist(stderr, d_hat=0, group_type='control'):
"""Returns a distribution object depending on group type
Examples:
Parameters:
stderr (float): pooled standard error of two independent samples
d_hat (float): the mean difference between two independent samples
group_type (string): 'control' and 'test' are supported
Returns:
dist (scipy.stats distribution object)
"""
if group_type == 'control':
sample_mean = 0
elif group_type == 'test':
sample_mean = d_hat
# create a normal distribution which is dependent on mean and std dev
dist = scs.norm(sample_mean, stderr)
return dist
def min_sample_size(bcr, mde, power=0.8, sig_level=0.05):
"""Returns the minimum sample size to set up a split test
Arguments:
bcr (float): probability of success for control, sometimes
referred to as baseline conversion rate
mde (float): minimum change in measurement between control
group and test group if alternative hypothesis is true, sometimes
referred to as minimum detectable effect
power (float): probability of rejecting the null hypothesis when the
null hypothesis is false, typically 0.8
sig_level (float): significance level often denoted as alpha,
typically 0.05
Returns:
min_N: minimum sample size (float)
References:
Stanford lecture on sample sizes
http://statweb.stanford.edu/~susan/courses/s141/hopower.pdf
"""
# standard normal distribution to determine z-values
standard_norm = scs.norm(0, 1)
# find Z_beta from desired power
Z_beta = standard_norm.ppf(power)
# find Z_alpha
Z_alpha = standard_norm.ppf(1-sig_level/2)
# average of probabilities from both groups
pooled_prob = (bcr + bcr+mde) / 2
min_N = (2 * pooled_prob * (1 - pooled_prob) * (Z_beta + Z_alpha)**2
/ mde**2)
return min_N
def p_val(N_A, N_B, p_A, p_B):
"""Returns the p-value for an A/B test"""
return scs.binom(N_A, p_A).pmf(p_B * N_B)