Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Frequentist Tests
4Note on estimating the population variance: We often use n-1 instead of n when estimating the
5population variance (Bessel's correction), where n is the number of samples. This method corrects
6the bias in the estimation of the population variance. It also partially corrects the bias in the
7estimation of the population standard deviation. However, the correction often increases the mean
8squared error in these estimations. When n is large this correction is small.
10TODO: add continuity corrections to unpooled z tests
11"""
13from typing import Tuple
14import logging
16import numpy as np
17from numpy import ndarray
18from scipy.stats import norm, t
19from scipy import stats
21# set logging
22logging.basicConfig(level=logging.INFO)
23logger = logging.getLogger(__name__)
25__all__ = [
26 'find_p_value', 'find_test_statistic', 'find_confidence_interval',
27 # 'mean', 'variance', 'standard_deviation',
28 'one_samp_z_prop', 'two_samp_z_prop',
29 'one_samp_z', 'two_samp_z',
30 'one_samp_t', 'two_samp_t',
31]
33################################################################################
34# auxiliary functions
37def find_p_value(test_statistic: float, df: float = np.inf, tails: bool = True) -> float:
38 """
39 A convenience function for finding p values for t tests and z tests.
41 Notes
42 -----
43 * sf is 1 - cdf
45 Parameters
46 ----------
47 test_statistic: float
48 The t or z test statistic
49 df: float
50 The degrees freedom. If infinity (np.inf), this is assumed to be a z test. Otherwise it is
51 assumed to be a t-test.
52 tails: bool
53 An indicator for two tailed tests. If True, this assumes a two tailed test. If False, this
54 assumes a one tailed test.
56 Returns
57 -------
58 float
59 The p value corresponding to the test statistic.
60 """
62 tails = 2 if tails else 1
63 if df < np.inf:
64 return stats.t.sf(
65 np.abs(test_statistic), loc=0.0, scale=1.0, df=df,
66 ) * tails
67 return stats.norm.sf(
68 np.abs(test_statistic),
69 loc=0.0, scale=1.0,
70 ) * tails
73def find_test_statistic(p_value: float, df: float = np.inf, tails: bool = True) -> float:
74 """
75 A convenience function for recovering t and z test statics from p-values
77 Parameters
78 ----------
79 p_value: float
80 The p-value of interest
81 df: float
82 The degrees freedom. If infinity (np.inf), this is assumed to be a z test. Otherwise it is
83 assumed to be a t-test. The degrees freedom is usually the number of total samples minus one
84 for the t test.
85 tails: bool
86 An indicator for two tailed tests. If True, this assumes a two tailed test. If False, this
87 assumes a one tailed test.
89 Returns
90 -------
91 float
92 The corresponding test statistic
93 """
95 if p_value <= 0.0 or p_value >= 1.0:
96 raise ValueError("Input p must be a float between 0 and 1 non-inclusive.")
98 p = 1.0 - p_value
99 p = (1.0 + p) / 2.0 if tails is True else p
100 if df == np.inf:
101 return norm(loc=0.0, scale=1.0).ppf(p)
102 return t(loc=0.0, scale=1.0, df=df).ppf(p)
105def find_confidence_interval(se: float, df: float = np.inf, alpha: float = 0.05,
106 tails: float = True) -> float:
107 """
108 A convenience function for finding the confidence interval based on the standard error.
110 Parameters
111 ----------
112 se: float
113 The standard error of the measurement (estimate).
114 df: float
115 The degrees freedom. If infinity (np.inf), this is assumed to be a z test. Otherwise it is
116 assumed to be a t-test.
117 alpha: float
118 The probability of making a type I error. A 95% credible interval has alpha = 5% or .05.
119 tails: bool
120 An indicator for two tailed tests. If True, this assumes a two tailed test. If False, this
121 assumes a one tailed test.
123 Returns
124 -------
125 float
126 The width of the confidence interval (absolute units).
127 """
128 tails = 2 if tails else 1
129 confidence = 1.0 - alpha
130 q = (1.0 + confidence) / tails
131 if df < np.inf:
132 return se * stats.t.ppf(
133 q=q, loc=0.0, scale=1.0, df=df,
134 )
135 return se * stats.norm.ppf(
136 q=q, loc=0.0, scale=1.0,
137 )
140################################################################################
141# educational functions (not for production use)
144def mean(arr: ndarray) -> float:
145 """
146 An example of how mean is calculated. This function is for educational purposes only. Please use
147 np.mean(arr) instead.
149 Parameters
150 ----------
151 arr: ndarray
152 An array containing the data to calculate the mean.
154 Returns
155 -------
156 float
157 The mean (average).
158 """
159 logger.warning("Please use the mean function in the numpy project instead.")
160 return np.sum(arr) / arr.shape[0]
163def variance(arr: ndarray, ddof: int = 0) -> float:
164 """
165 An example of how variance is calculated. This function is for educational purposes only. Please
166 use np.var(arr, ddof) instead.
168 Parameters
169 ----------
170 arr: ndarray
171 An array containing the data to calculate the variance.
172 ddof: int
173 The number of degrees of freedom.
175 Returns
176 -------
177 float
178 The variance.
179 """
180 logger.warning("Please use the stddev function in the numpy project instead.")
181 assert ddof >= 0, "Degrees freedom must be greater than or equal to 0"
182 # Number of observations
183 n = arr.shape[0]
184 assert ddof < n, "Degrees freedom must be less than total observations"
185 # Mean of the data
186 mu = np.sum(arr) / n
187 # Square deviations
188 deviations = (arr - mu)**2.0
189 # Variance
190 return np.sum(deviations) / (n - ddof)
193def standard_deviation(arr: ndarray, ddof: int = 0) -> float:
194 """
195 An example of how standard deviation is calculated. This function is for educational purposes
196 only. Please use np.std(arr, ddof) instead.
198 Parameters
199 ----------
200 arr: ndarray
201 An array containing the data to calculate the standard deviation.
202 ddof: int
203 The number of degrees of freedom.
205 Returns
206 -------
207 float
208 The standard deviation.
209 """
210 logger.warning("Please use the stddev function in the numpy project instead.")
211 return np.sqrt(variance(arr, ddof=ddof))
213################################################################################
214# normal approximation (z) proportions tests
217def _one_samp_z_prop(n: int, successes: int, null_h: float = 0.5) -> Tuple[float, float]:
218 """
219 Function for one sample z test of proportions.
221 Parameters
222 ----------
223 n: int
224 The number of samples (observations).
225 successes:
226 The number of events.
227 null_h: float
228 The point null hypothesis to use when comparing the means.
230 Returns
231 -------
232 float
233 test statistic (t statistic)
234 float
235 standard error
236 """
237 assert successes <= n, "Input successes must be less than or equal to n."
238 p_hat = successes / n
239 se = np.sqrt(p_hat * (1.0 - p_hat) / (n - 1.0))
240 z = (p_hat - null_h) / se
241 return z, se
244def one_samp_z_prop(sample: ndarray, null_h: float = 0.5) -> Tuple[float, float]:
245 """
246 Function for one sample z test of proportions.
248 Parameters
249 ----------
250 sample: ndarray
251 An array of samples (observations).
252 null_h: float
253 The point null hypothesis to use when comparing the means.
255 Returns
256 -------
257 float
258 test statistic (t statistic)
259 float
260 standard error
261 """
262 n = sample.shape[0]
263 successes = sample.sum()
264 return _one_samp_z_prop(n=n, successes=successes, null_h=null_h)
267def paired_z_prop(sample1: ndarray, sample2: ndarray, null_h: float = 0.5) -> Tuple[float, float]:
268 """
269 Function for paired z test of proportions. Math is the same as for a one sample z test of
270 proportions.
272 Parameters
273 ----------
274 sample1 : ndarray
275 A numpy array with the unit level data from the first sample.
276 sample2 : ndarray
277 A numpy array with the unit level data from the second sample.
278 null_h: float
279 The point null hypothesis to use when comparing the means.
281 Returns
282 -------
283 float
284 test statistic (t statistic)
285 float
286 standard error
287 """
288 return one_samp_z_prop(sample=sample1-sample2, null_h=null_h)
291def _two_samp_z_prop(n1: int, n2: int, successes1: int, successes2: int, null_h: float = 0.0,
292 pooled: bool = False) -> Tuple[float, float]:
293 """
294 Function for two sample z test of proportions.
296 Parameters
297 ----------
298 n1: int
299 The number of data points (observations) in sample one.
300 n2: int
301 The number of data points (observations) in sample two.
302 successes1: int
303 The number of events in sample one.
304 successes2: int
305 The number of events in sample two.
306 null_h: float
307 The point null hypothesis to use when comparing the means.
308 pooled: bool
309 Indicates whether to use the assumption that the sample variances are equal or not.
310 Pooled = True assumes that the variances are equal. It is common to use the pooled
311 assumption given that the unpooled assumption yields over confident estimates in practice
312 (barring the appropriate corrections).
314 Returns
315 -------
316 float
317 test statistic (t statistic)
318 float
319 standard error
320 """
321 assert successes1 <= n1, "Input successes1 must be less than or equal to n1."
322 assert successes2 <= n2, "Input successes2 must be less than or equal to n2."
323 p1 = successes1 / n1
324 p2 = successes2 / n2
325 if pooled:
326 p = (successes1 + successes2) / (n1 + n2)
327 se = np.sqrt(p * (1.0 - p) * (1.0 / n1 + 1.0 / n2))
328 else:
329 se = np.sqrt(
330 p1 * (1.0 - p1) / n1 +
331 p2 * (1.0 - p2) / n2
332 )
333 z = (p1 - p2 - null_h) / se
334 return z, se
337def two_samp_z_prop(sample1: ndarray, sample2: ndarray, null_h: float = 0.0, pooled: bool = False) \
338 -> Tuple[float, float]:
339 """
340 Function for two sample z test of proportions.
342 Parameters
343 ----------
344 sample1 : ndarray
345 A numpy array with the unit level data from the first sample.
346 sample2 : ndarray
347 A numpy array with the unit level data from the second sample.
348 null_h: float
349 The point null hypothesis to use when comparing the means.
350 pooled: bool
351 Indicates whether to use the assumption that the sample variances are equal or not.
352 Pooled = True assumes that the variances are equal. It is common to use the pooled
353 assumption given that the unpooled assumption yields over confident estimates in practice
354 (barring the appropriate corrections).
356 Returns
357 -------
358 float
359 test statistic (t statistic)
360 float
361 standard error
362 """
363 n1 = sample1.shape[0]
364 n2 = sample2.shape[0]
365 successes1 = sample1.sum()
366 successes2 = sample2.sum()
367 return _two_samp_z_prop(n1, n2, successes1, successes2, null_h=null_h, pooled=pooled)
369################################################################################
370# normal (z) tests
373def _one_samp_z(n: int, mu: float, sigma: float, null_h: float = 0.0) -> Tuple[float, float]:
374 """
375 Function for one sample z test.
377 Parameters
378 ----------
379 n: int
380 The number of samples (observations).
381 mu: float
382 The mean of the sample data.
383 sigma: float
384 The standard deviation of the sample data.
385 null_h: float
386 The point null hypothesis to use when comparing the means.
388 Returns
389 -------
390 float
391 test statistic (t statistic)
392 float
393 standard error
394 """
395 se = sigma / np.sqrt(n)
396 z = (mu - null_h) / se
397 return z, se
400def one_samp_z(sample: ndarray, null_h: float = 0.0) -> Tuple[float, float]:
401 """
402 Function for one sample z test.
404 Parameters
405 ----------
406 sample: ndarray
407 An array of samples (observations).
408 null_h: float
409 The point null hypothesis to use when comparing the means.
411 Returns
412 -------
413 float
414 test statistic (t statistic)
415 float
416 standard error
417 """
418 n = sample.shape[0]
419 mu = sample.mean()
420 sigma = sample.std(ddof=1)
421 return _one_samp_z(n=n, mu=mu, sigma=sigma, null_h=null_h)
424def paired_z(sample1: ndarray, sample2: ndarray, null_h: float = 0.0) -> Tuple[float, float]:
425 """
426 Function for paired z test. Math is the same as for a one sample z test.
428 Parameters
429 ----------
430 sample1 : ndarray
431 A numpy array with the unit level data from the first sample.
432 sample2 : ndarray
433 A numpy array with the unit level data from the second sample.
434 null_h: float
435 The point null hypothesis to use when comparing the means.
437 Returns
438 -------
439 float
440 test statistic (t statistic)
441 float
442 standard error
443 """
444 return one_samp_z(sample=sample1-sample2, null_h=null_h)
447def _two_samp_z(n1: int, n2: int, mu1: float, mu2: float, sigma1: float, sigma2: float,
448 null_h: float = 0.0, pooled: bool = False) -> \
449 Tuple[float, float]:
450 """
451 Function for a two sample z test.
453 Parameters
454 ----------
455 n1: int
456 The sample size for the first sample.
457 n2: int
458 The sample size for the second sample.
459 mu1: float
460 The mean of the first sample.
461 mu2: float
462 The mean of the second sample.
463 sigma1: float
464 The standard deviation of the first sample.
465 sigma2: float
466 The standard deviation of the second sample.
467 null_h: float
468 The point null hypothesis to use when comparing the means.
469 pooled: bool
470 Indicates whether to use the assumption that the sample variances are equal or not.
471 Pooled = True assumes that the variances are equal. It is common to use the pooled
472 assumption given that the unpooled assumption yields over confident estimates in practice
473 (barring the appropriate corrections).
475 Returns
476 -------
477 float
478 test statistic (t statistic)
479 float
480 standard error
481 """
482 if pooled:
483 se = np.sqrt(
484 (n1 * sigma1 ** 2.0 + n2 * sigma2 ** 2.0) / (n1 + n2 - 2) * (1.0 / n1 + 1.0 / n2)
485 )
486 else:
487 se = np.sqrt(sigma1**2.0 / n1 + sigma2**2.0 / n2)
488 z = (mu1 - mu2 - null_h) / se
489 return z, se
492def two_samp_z(sample1: ndarray, sample2: ndarray, null_h: float = 0.0, pooled: bool = False) -> \
493 Tuple[float, float]:
494 """
495 Function for a two sample z test.
497 Parameters
498 ----------
499 sample1 : ndarray
500 A numpy array with the unit level data from the first sample.
501 sample2 : ndarray
502 A numpy array with the unit level data from the second sample.
503 null_h: float
504 The point null hypothesis to use when comparing the means.
505 pooled: bool
506 Indicates whether to use the assumption that the sample variances are equal or not.
507 Pooled = True assumes that the variances are equal. It is common to use the pooled
508 assumption given that the unpooled assumption yields over confident estimates in practice
509 (barring the appropriate corrections).
511 Returns
512 -------
513 float
514 test statistic (t statistic)
515 float
516 standard error
517 """
518 n1 = sample1.shape[0]
519 n2 = sample2.shape[0]
520 mu1 = sample1.mean()
521 mu2 = sample2.mean()
522 sigma1 = sample1.std()
523 sigma2 = sample2.std()
524 return _two_samp_z(n1=n1, n2=n2, mu1=mu1, mu2=mu2, sigma1=sigma1, sigma2=sigma2, null_h=null_h,
525 pooled=pooled)
527################################################################################
528# student t tests
531def _one_samp_t(n: int, mu: float, sigma: float, null_h: float = 0.0) -> Tuple[float, float, float]:
532 """
534 Parameters
535 ----------
536 n: int
537 The number of samples (observations).
538 mu: float
539 The mean of the sample data.
540 sigma: float
541 The standard deviation of the sample data.
542 null_h: float
543 The point null hypothesis to use when comparing the means.
545 Returns
546 -------
547 float
548 test statistic (t statistic)
549 float
550 standard error
551 float
552 degrees freedom
553 """
554 se = sigma / np.sqrt(n)
555 t = (mu - null_h) / se
556 df = n - 1.0
557 return t, se, df
560def one_samp_t(sample: ndarray, null_h: float = 0.0) -> Tuple[float, float, float]:
561 """
563 Parameters
564 ----------
565 sample: ndarray
566 An array of samples (observations).
567 null_h: float
568 The point null hypothesis to use when comparing the means.
570 Returns
571 -------
572 float
573 test statistic (t statistic)
574 float
575 standard error
576 float
577 degrees freedom
578 """
579 n = sample.shape[0]
580 mu = sample.mean()
581 sigma = sample.std(ddof=1)
582 return _one_samp_t(n=n, mu=mu, sigma=sigma, null_h=null_h)
585def paired_t(sample1: ndarray, sample2: ndarray, null_h: float = 0.0) -> Tuple[float, float, float]:
586 """
588 Parameters
589 ----------
590 sample1 : ndarray
591 A numpy array with the unit level data from the first sample.
592 sample2 : ndarray
593 A numpy array with the unit level data from the second sample. Must be the same dimensions
594 as sample1.
595 null_h: float
596 The point null hypothesis to use when comparing the means.
598 Returns
599 -------
600 float
601 test statistic (t statistic)
602 float
603 standard error
604 float
605 degrees freedom
606 """
607 return one_samp_t(sample=sample1-sample2, null_h=null_h)
610def _two_samp_t(n1: int, n2: int, mu1: float, mu2: float, sigma1: float, sigma2: float,
611 null_h: float = 0.0, pooled: bool = False) -> Tuple[float, float, float]:
612 """
613 A simple function for running two sample student t tests.
615 Calculate the standard deviation assuming one degree of freedom. For
616 example, using numpy np.std(ddof=1).
618 Parameters
619 ----------
620 n1: int
621 The sample size for the first sample.
622 n2: int
623 The sample size for the second sample.
624 mu1: float
625 The mean of the first sample.
626 mu2: float
627 The mean of the second sample.
628 sigma1: float
629 The standard deviation of the first sample.
630 sigma2: float
631 The standard deviation of the second sample.
632 null_h: float
633 The point null hypothesis to use when comparing the means.
634 pooled: bool
635 Indicates whether to use the assumptions that the sample variances are equal or not.
636 Pooled = True assumes that the variances are equal. The un-pooled t-test is sometimes
637 called Welch's t-test.
639 Returns
640 -------
641 float
642 test statistic (t statistic)
643 float
644 standard error
645 float
646 degrees freedom
648 Examples
649 --------
650 >>> t_stat, se, df = two_samp_t(
651 >>> n1 = 13, n2 = 10,
652 >>> mu1 = 1.1, mu2 = 1.0,
653 >>> sigma1 = 3.0, sigma2 = 2.0,
654 >>> null_h = 0.0, pooled = False
655 >>> )
657 References
658 ----------
659 Bernard Rosner
660 * Eq 8.11, 8.21 Fundamentals of Biostatistics
661 """
663 # v1 = sigma1**2.0
664 # v2 = sigma2**2.0
665 # if pooled:
666 # df = n1 + n2 - 2.0
667 # svar = ((n1 - 1.0) * v1 + (n2 - 1.0) * v2) / df
668 # se = np.sqrt( svar * (1.0 / n1 + 1.0 / n2))
669 # else:
670 # vn1 = v1 / n1
671 # vn2 = v2 / n2
672 # df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1))
673 # se = np.sqrt(vn1 + vn2)
675 if pooled:
676 df = n1 + n2 - 2.0
677 sp = np.sqrt(((n1 - 1.0)*sigma1**2.0 + (n2 - 1.0)*sigma2**2.0) / df)
678 se = sp * np.sqrt(1.0 / n1 + 1.0 / n2)
679 else:
680 C = (sigma1**2.0 / n1) / (sigma1**2.0 / n1 + sigma2**2.0 / n2)
681 df = (n1 - 1.0) * (n2 - 1.0) / \
682 ((n2 - 1.0) * C**2.0 + (1.0 - C)**2.0 * (n1 - 1.0))
683 se = np.sqrt(sigma1**2.0 / n1 + sigma2**2.0 / n2)
685 t = (mu1 - mu2 - null_h) / se
686 return t, se, df
689def two_samp_t(sample1: ndarray, sample2: ndarray, null_h: float = 0.0, pooled: bool = False) -> \
690 Tuple[float, float, float]:
691 """
692 A simple function for running two sample student t tests.
694 Parameters
695 ----------
696 sample1 : ndarray
697 A numpy array with the unit level data from the first sample.
698 sample2 : ndarray
699 A numpy array with the unit level data from the second sample.
700 null_h: float
701 The point null hypothesis to use when comparing the means.
702 pooled: bool
703 Indicates whether to use the assumptions that the sample variances are equal or not.
704 Pooled = True assumes that the variances are equal. The un-pooled t-test is sometimes
705 called Welch's t-test
707 Returns
708 -------
709 float
710 test statistic (t statistic)
711 float
712 standard error
713 float
714 degrees freedom
716 Examples
717 --------
718 >>> t_stat, se, df = two_samp_t(
719 >>> sample1 = np.asarray([1, 2, 3]),
720 >>> sample2 = np.asarray([1, 2, 3]),
721 >>> null_h = 0.0, pooled = False
722 >>> )
724 References
725 ----------
726 Bernard Rosner
727 * Eq 8.11, 8.21 Fundamentals of Biostatistics
728 """
730 n1 = sample1.shape[0]
731 n2 = sample2.shape[0]
732 mu1 = sample1.mean()
733 mu2 = sample2.mean()
734 sigma1 = sample1.std(ddof=1)
735 sigma2 = sample2.std(ddof=1)
736 return _two_samp_t(
737 n1=n1, n2=n2,
738 mu1=mu1, mu2=mu2,
739 sigma1=sigma1, sigma2=sigma2,
740 null_h=null_h, pooled=pooled
741 )