forked from AllenDowney/ThinkStats2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchap09soln.py
More file actions
180 lines (133 loc) · 5.34 KB
/
chap09soln.py
File metadata and controls
180 lines (133 loc) · 5.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""This file contains code used in "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2014 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
from __future__ import print_function, division
import first
import hypothesis
import scatter
import thinkstats2
import numpy as np
"""This file contains a solution to exercises in Think Stats:
As sample size increases, the power of a hypothesis test increases,
which means it is more likely to be positive if the effect is real.
Conversely, as sample size decreases, the test is less likely to
be positive even if the effect is real.
To investigate this behavior, run the tests in this chapter with
different subsets of the NSFG data. You can use thinkstats2.SampleRows
to select a random subset of the rows in a DataFrame.
What happens to the p-values of these tests as sample size decreases?
What is the smallest sample size that yields a positive test?
My results:
test1: difference in mean pregnancy length
test2: difference in mean birth weight
test3: correlation of mother's age and birth weight
test4: chi-square test of pregnancy length
n test1 test2 test2 test4
9148 0.16 0.00 0.00 0.00
4574 0.10 0.01 0.00 0.00
2287 0.25 0.06 0.00 0.00
1143 0.24 0.03 0.39 0.03
571 0.81 0.00 0.04 0.04
285 0.57 0.41 0.48 0.83
142 0.45 0.08 0.60 0.04
Conclusion: As expected, tests that are positive with large sample
sizes become negative as we take away data. But the pattern is
erratic, with some positive tests even at small sample sizes.
In Section~\ref{testing}, we simulated the null hypothesis by
permutation; that is, we treated the observed values as if they
represented the entire population, and randomly assigned the
members of the population to the two groups.
An alternative is to use the sample to estimate the distribution for
the population, then draw a random sample from that distribution.
This process is called resampling. There are several ways to
implement the resampling, but one of the simplest is to draw a sample,
with replacement, from the observed values, as in Section~\ref{power}.
Write a class named {\tt DiffMeansResample} that inherits from
{\tt DiffMeansPermute} and overrides {\tt RunModel} to implement
resampling, rather than permutation.
Use this model to test the differences in pregnancy length and
birth weight. How much does the model affect the results?
Results:
means permute preglength
p-value = 0.1674
actual = 0.0780372667775
ts max = 0.226752436104
means permute birthweight
p-value = 0.0
actual = 0.124761184535
ts max = 0.112243501197
Conclusions: Using resampling instead of permutation has very
little effect on the results.
The two models are based on slightly difference assumptions, and in
this example there is no compelling reason to choose one or the other.
But in general p-values depend on the choice of the null hypothesis;
different models can yield very different results.
"""
class DiffMeansResample(hypothesis.DiffMeansPermute):
"""Tests a difference in means using resampling."""
def RunModel(self):
"""Run the model of the null hypothesis.
returns: simulated data
"""
group1 = np.random.choice(self.pool, self.n, replace=True)
group2 = np.random.choice(self.pool, self.m, replace=True)
return group1, group2
def RunResampleTest(firsts, others):
"""Tests differences in means by resampling.
firsts: DataFrame
others: DataFrame
"""
data = firsts.prglngth.values, others.prglngth.values
ht = DiffMeansResample(data)
p_value = ht.PValue(iters=10000)
print('\nmeans permute preglength')
print('p-value =', p_value)
print('actual =', ht.actual)
print('ts max =', ht.MaxTestStat())
data = (firsts.totalwgt_lb.dropna().values,
others.totalwgt_lb.dropna().values)
ht = hypothesis.DiffMeansPermute(data)
p_value = ht.PValue(iters=10000)
print('\nmeans permute birthweight')
print('p-value =', p_value)
print('actual =', ht.actual)
print('ts max =', ht.MaxTestStat())
def RunTests(live, iters=1000):
"""Runs the tests from Chapter 9 with a subset of the data.
live: DataFrame
iters: how many iterations to run
"""
n = len(live)
firsts = live[live.birthord == 1]
others = live[live.birthord != 1]
# compare pregnancy lengths
data = firsts.prglngth.values, others.prglngth.values
ht = hypothesis.DiffMeansPermute(data)
p1 = ht.PValue(iters=iters)
data = (firsts.totalwgt_lb.dropna().values,
others.totalwgt_lb.dropna().values)
ht = hypothesis.DiffMeansPermute(data)
p2 = ht.PValue(iters=iters)
# test correlation
live2 = live.dropna(subset=['agepreg', 'totalwgt_lb'])
data = live2.agepreg.values, live2.totalwgt_lb.values
ht = hypothesis.CorrelationPermute(data)
p3 = ht.PValue(iters=iters)
# compare pregnancy lengths (chi-squared)
data = firsts.prglngth.values, others.prglngth.values
ht = hypothesis.PregLengthTest(data)
p4 = ht.PValue(iters=iters)
print('%d\t%0.2f\t%0.2f\t%0.2f\t%0.2f' % (n, p1, p2, p3, p4))
def main():
thinkstats2.RandomSeed(18)
live, firsts, others = first.MakeFrames()
RunResampleTest(firsts, others)
n = len(live)
for _ in range(7):
sample = thinkstats2.SampleRows(live, n)
RunTests(sample)
n //= 2
if __name__ == '__main__':
main()