Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Frequentist Tests 

3 

4Note on estimating the population variance: We often use n-1 instead of n when estimating the 

5population variance (Bessel's correction), where n is the number of samples. This method corrects 

6the bias in the estimation of the population variance. It also partially corrects the bias in the 

7estimation of the population standard deviation. However, the correction often increases the mean 

8squared error in these estimations. When n is large this correction is small. 

9 

10TODO: add continuity corrections to unpooled z tests 

11""" 

12 

13from typing import Tuple 

14import logging 

15 

16import numpy as np 

17from numpy import ndarray 

18from scipy.stats import norm, t 

19from scipy import stats 

20 

21# set logging 

22logging.basicConfig(level=logging.INFO) 

23logger = logging.getLogger(__name__) 

24 

25__all__ = [ 

26 'find_p_value', 'find_test_statistic', 'find_confidence_interval', 

27 # 'mean', 'variance', 'standard_deviation', 

28 'one_samp_z_prop', 'two_samp_z_prop', 

29 'one_samp_z', 'two_samp_z', 

30 'one_samp_t', 'two_samp_t', 

31] 

32 

33################################################################################ 

34# auxiliary functions 

35 

36 

37def find_p_value(test_statistic: float, df: float = np.inf, tails: bool = True) -> float: 

38 """ 

39 A convenience function for finding p values for t tests and z tests. 

40 

41 Notes 

42 ----- 

43 * sf is 1 - cdf 

44 

45 Parameters 

46 ---------- 

47 test_statistic: float 

48 The t or z test statistic 

49 df: float 

50 The degrees freedom. If infinity (np.inf), this is assumed to be a z test. Otherwise it is 

51 assumed to be a t-test. 

52 tails: bool 

53 An indicator for two tailed tests. If True, this assumes a two tailed test. If False, this 

54 assumes a one tailed test. 

55 

56 Returns 

57 ------- 

58 float 

59 The p value corresponding to the test statistic. 

60 """ 

61 

62 tails = 2 if tails else 1 

63 if df < np.inf: 

64 return stats.t.sf( 

65 np.abs(test_statistic), loc=0.0, scale=1.0, df=df, 

66 ) * tails 

67 return stats.norm.sf( 

68 np.abs(test_statistic), 

69 loc=0.0, scale=1.0, 

70 ) * tails 

71 

72 

73def find_test_statistic(p_value: float, df: float = np.inf, tails: bool = True) -> float: 

74 """ 

75 A convenience function for recovering t and z test statics from p-values 

76 

77 Parameters 

78 ---------- 

79 p_value: float 

80 The p-value of interest 

81 df: float 

82 The degrees freedom. If infinity (np.inf), this is assumed to be a z test. Otherwise it is 

83 assumed to be a t-test. The degrees freedom is usually the number of total samples minus one 

84 for the t test. 

85 tails: bool 

86 An indicator for two tailed tests. If True, this assumes a two tailed test. If False, this 

87 assumes a one tailed test. 

88 

89 Returns 

90 ------- 

91 float 

92 The corresponding test statistic 

93 """ 

94 

95 if p_value <= 0.0 or p_value >= 1.0: 

96 raise ValueError("Input p must be a float between 0 and 1 non-inclusive.") 

97 

98 p = 1.0 - p_value 

99 p = (1.0 + p) / 2.0 if tails is True else p 

100 if df == np.inf: 

101 return norm(loc=0.0, scale=1.0).ppf(p) 

102 return t(loc=0.0, scale=1.0, df=df).ppf(p) 

103 

104 

105def find_confidence_interval(se: float, df: float = np.inf, alpha: float = 0.05, 

106 tails: float = True) -> float: 

107 """ 

108 A convenience function for finding the confidence interval based on the standard error. 

109 

110 Parameters 

111 ---------- 

112 se: float 

113 The standard error of the measurement (estimate). 

114 df: float 

115 The degrees freedom. If infinity (np.inf), this is assumed to be a z test. Otherwise it is 

116 assumed to be a t-test. 

117 alpha: float 

118 The probability of making a type I error. A 95% credible interval has alpha = 5% or .05. 

119 tails: bool 

120 An indicator for two tailed tests. If True, this assumes a two tailed test. If False, this 

121 assumes a one tailed test. 

122 

123 Returns 

124 ------- 

125 float 

126 The width of the confidence interval (absolute units). 

127 """ 

128 tails = 2 if tails else 1 

129 confidence = 1.0 - alpha 

130 q = (1.0 + confidence) / tails 

131 if df < np.inf: 

132 return se * stats.t.ppf( 

133 q=q, loc=0.0, scale=1.0, df=df, 

134 ) 

135 return se * stats.norm.ppf( 

136 q=q, loc=0.0, scale=1.0, 

137 ) 

138 

139 

140################################################################################ 

141# educational functions (not for production use) 

142 

143 

144def mean(arr: ndarray) -> float: 

145 """ 

146 An example of how mean is calculated. This function is for educational purposes only. Please use 

147 np.mean(arr) instead. 

148 

149 Parameters 

150 ---------- 

151 arr: ndarray 

152 An array containing the data to calculate the mean. 

153 

154 Returns 

155 ------- 

156 float 

157 The mean (average). 

158 """ 

159 logger.warning("Please use the mean function in the numpy project instead.") 

160 return np.sum(arr) / arr.shape[0] 

161 

162 

163def variance(arr: ndarray, ddof: int = 0) -> float: 

164 """ 

165 An example of how variance is calculated. This function is for educational purposes only. Please 

166 use np.var(arr, ddof) instead. 

167 

168 Parameters 

169 ---------- 

170 arr: ndarray 

171 An array containing the data to calculate the variance. 

172 ddof: int 

173 The number of degrees of freedom. 

174 

175 Returns 

176 ------- 

177 float 

178 The variance. 

179 """ 

180 logger.warning("Please use the stddev function in the numpy project instead.") 

181 assert ddof >= 0, "Degrees freedom must be greater than or equal to 0" 

182 # Number of observations 

183 n = arr.shape[0] 

184 assert ddof < n, "Degrees freedom must be less than total observations" 

185 # Mean of the data 

186 mu = np.sum(arr) / n 

187 # Square deviations 

188 deviations = (arr - mu)**2.0 

189 # Variance 

190 return np.sum(deviations) / (n - ddof) 

191 

192 

193def standard_deviation(arr: ndarray, ddof: int = 0) -> float: 

194 """ 

195 An example of how standard deviation is calculated. This function is for educational purposes 

196 only. Please use np.std(arr, ddof) instead. 

197 

198 Parameters 

199 ---------- 

200 arr: ndarray 

201 An array containing the data to calculate the standard deviation. 

202 ddof: int 

203 The number of degrees of freedom. 

204 

205 Returns 

206 ------- 

207 float 

208 The standard deviation. 

209 """ 

210 logger.warning("Please use the stddev function in the numpy project instead.") 

211 return np.sqrt(variance(arr, ddof=ddof)) 

212 

213################################################################################ 

214# normal approximation (z) proportions tests 

215 

216 

217def _one_samp_z_prop(n: int, successes: int, null_h: float = 0.5) -> Tuple[float, float]: 

218 """ 

219 Function for one sample z test of proportions. 

220 

221 Parameters 

222 ---------- 

223 n: int 

224 The number of samples (observations). 

225 successes: 

226 The number of events. 

227 null_h: float 

228 The point null hypothesis to use when comparing the means. 

229 

230 Returns 

231 ------- 

232 float 

233 test statistic (t statistic) 

234 float 

235 standard error 

236 """ 

237 assert successes <= n, "Input successes must be less than or equal to n." 

238 p_hat = successes / n 

239 se = np.sqrt(p_hat * (1.0 - p_hat) / (n - 1.0)) 

240 z = (p_hat - null_h) / se 

241 return z, se 

242 

243 

244def one_samp_z_prop(sample: ndarray, null_h: float = 0.5) -> Tuple[float, float]: 

245 """ 

246 Function for one sample z test of proportions. 

247 

248 Parameters 

249 ---------- 

250 sample: ndarray 

251 An array of samples (observations). 

252 null_h: float 

253 The point null hypothesis to use when comparing the means. 

254 

255 Returns 

256 ------- 

257 float 

258 test statistic (t statistic) 

259 float 

260 standard error 

261 """ 

262 n = sample.shape[0] 

263 successes = sample.sum() 

264 return _one_samp_z_prop(n=n, successes=successes, null_h=null_h) 

265 

266 

267def paired_z_prop(sample1: ndarray, sample2: ndarray, null_h: float = 0.5) -> Tuple[float, float]: 

268 """ 

269 Function for paired z test of proportions. Math is the same as for a one sample z test of 

270 proportions. 

271 

272 Parameters 

273 ---------- 

274 sample1 : ndarray 

275 A numpy array with the unit level data from the first sample. 

276 sample2 : ndarray 

277 A numpy array with the unit level data from the second sample. 

278 null_h: float 

279 The point null hypothesis to use when comparing the means. 

280 

281 Returns 

282 ------- 

283 float 

284 test statistic (t statistic) 

285 float 

286 standard error 

287 """ 

288 return one_samp_z_prop(sample=sample1-sample2, null_h=null_h) 

289 

290 

291def _two_samp_z_prop(n1: int, n2: int, successes1: int, successes2: int, null_h: float = 0.0, 

292 pooled: bool = False) -> Tuple[float, float]: 

293 """ 

294 Function for two sample z test of proportions. 

295 

296 Parameters 

297 ---------- 

298 n1: int 

299 The number of data points (observations) in sample one. 

300 n2: int 

301 The number of data points (observations) in sample two. 

302 successes1: int 

303 The number of events in sample one. 

304 successes2: int 

305 The number of events in sample two. 

306 null_h: float 

307 The point null hypothesis to use when comparing the means. 

308 pooled: bool 

309 Indicates whether to use the assumption that the sample variances are equal or not. 

310 Pooled = True assumes that the variances are equal. It is common to use the pooled 

311 assumption given that the unpooled assumption yields over confident estimates in practice 

312 (barring the appropriate corrections). 

313 

314 Returns 

315 ------- 

316 float 

317 test statistic (t statistic) 

318 float 

319 standard error 

320 """ 

321 assert successes1 <= n1, "Input successes1 must be less than or equal to n1." 

322 assert successes2 <= n2, "Input successes2 must be less than or equal to n2." 

323 p1 = successes1 / n1 

324 p2 = successes2 / n2 

325 if pooled: 

326 p = (successes1 + successes2) / (n1 + n2) 

327 se = np.sqrt(p * (1.0 - p) * (1.0 / n1 + 1.0 / n2)) 

328 else: 

329 se = np.sqrt( 

330 p1 * (1.0 - p1) / n1 + 

331 p2 * (1.0 - p2) / n2 

332 ) 

333 z = (p1 - p2 - null_h) / se 

334 return z, se 

335 

336 

337def two_samp_z_prop(sample1: ndarray, sample2: ndarray, null_h: float = 0.0, pooled: bool = False) \ 

338 -> Tuple[float, float]: 

339 """ 

340 Function for two sample z test of proportions. 

341 

342 Parameters 

343 ---------- 

344 sample1 : ndarray 

345 A numpy array with the unit level data from the first sample. 

346 sample2 : ndarray 

347 A numpy array with the unit level data from the second sample. 

348 null_h: float 

349 The point null hypothesis to use when comparing the means. 

350 pooled: bool 

351 Indicates whether to use the assumption that the sample variances are equal or not. 

352 Pooled = True assumes that the variances are equal. It is common to use the pooled 

353 assumption given that the unpooled assumption yields over confident estimates in practice 

354 (barring the appropriate corrections). 

355 

356 Returns 

357 ------- 

358 float 

359 test statistic (t statistic) 

360 float 

361 standard error 

362 """ 

363 n1 = sample1.shape[0] 

364 n2 = sample2.shape[0] 

365 successes1 = sample1.sum() 

366 successes2 = sample2.sum() 

367 return _two_samp_z_prop(n1, n2, successes1, successes2, null_h=null_h, pooled=pooled) 

368 

369################################################################################ 

370# normal (z) tests 

371 

372 

373def _one_samp_z(n: int, mu: float, sigma: float, null_h: float = 0.0) -> Tuple[float, float]: 

374 """ 

375 Function for one sample z test. 

376 

377 Parameters 

378 ---------- 

379 n: int 

380 The number of samples (observations). 

381 mu: float 

382 The mean of the sample data. 

383 sigma: float 

384 The standard deviation of the sample data. 

385 null_h: float 

386 The point null hypothesis to use when comparing the means. 

387 

388 Returns 

389 ------- 

390 float 

391 test statistic (t statistic) 

392 float 

393 standard error 

394 """ 

395 se = sigma / np.sqrt(n) 

396 z = (mu - null_h) / se 

397 return z, se 

398 

399 

400def one_samp_z(sample: ndarray, null_h: float = 0.0) -> Tuple[float, float]: 

401 """ 

402 Function for one sample z test. 

403 

404 Parameters 

405 ---------- 

406 sample: ndarray 

407 An array of samples (observations). 

408 null_h: float 

409 The point null hypothesis to use when comparing the means. 

410 

411 Returns 

412 ------- 

413 float 

414 test statistic (t statistic) 

415 float 

416 standard error 

417 """ 

418 n = sample.shape[0] 

419 mu = sample.mean() 

420 sigma = sample.std(ddof=1) 

421 return _one_samp_z(n=n, mu=mu, sigma=sigma, null_h=null_h) 

422 

423 

424def paired_z(sample1: ndarray, sample2: ndarray, null_h: float = 0.0) -> Tuple[float, float]: 

425 """ 

426 Function for paired z test. Math is the same as for a one sample z test. 

427 

428 Parameters 

429 ---------- 

430 sample1 : ndarray 

431 A numpy array with the unit level data from the first sample. 

432 sample2 : ndarray 

433 A numpy array with the unit level data from the second sample. 

434 null_h: float 

435 The point null hypothesis to use when comparing the means. 

436 

437 Returns 

438 ------- 

439 float 

440 test statistic (t statistic) 

441 float 

442 standard error 

443 """ 

444 return one_samp_z(sample=sample1-sample2, null_h=null_h) 

445 

446 

447def _two_samp_z(n1: int, n2: int, mu1: float, mu2: float, sigma1: float, sigma2: float, 

448 null_h: float = 0.0, pooled: bool = False) -> \ 

449 Tuple[float, float]: 

450 """ 

451 Function for a two sample z test. 

452 

453 Parameters 

454 ---------- 

455 n1: int 

456 The sample size for the first sample. 

457 n2: int 

458 The sample size for the second sample. 

459 mu1: float 

460 The mean of the first sample. 

461 mu2: float 

462 The mean of the second sample. 

463 sigma1: float 

464 The standard deviation of the first sample. 

465 sigma2: float 

466 The standard deviation of the second sample. 

467 null_h: float 

468 The point null hypothesis to use when comparing the means. 

469 pooled: bool 

470 Indicates whether to use the assumption that the sample variances are equal or not. 

471 Pooled = True assumes that the variances are equal. It is common to use the pooled 

472 assumption given that the unpooled assumption yields over confident estimates in practice 

473 (barring the appropriate corrections). 

474 

475 Returns 

476 ------- 

477 float 

478 test statistic (t statistic) 

479 float 

480 standard error 

481 """ 

482 if pooled: 

483 se = np.sqrt( 

484 (n1 * sigma1 ** 2.0 + n2 * sigma2 ** 2.0) / (n1 + n2 - 2) * (1.0 / n1 + 1.0 / n2) 

485 ) 

486 else: 

487 se = np.sqrt(sigma1**2.0 / n1 + sigma2**2.0 / n2) 

488 z = (mu1 - mu2 - null_h) / se 

489 return z, se 

490 

491 

492def two_samp_z(sample1: ndarray, sample2: ndarray, null_h: float = 0.0, pooled: bool = False) -> \ 

493 Tuple[float, float]: 

494 """ 

495 Function for a two sample z test. 

496 

497 Parameters 

498 ---------- 

499 sample1 : ndarray 

500 A numpy array with the unit level data from the first sample. 

501 sample2 : ndarray 

502 A numpy array with the unit level data from the second sample. 

503 null_h: float 

504 The point null hypothesis to use when comparing the means. 

505 pooled: bool 

506 Indicates whether to use the assumption that the sample variances are equal or not. 

507 Pooled = True assumes that the variances are equal. It is common to use the pooled 

508 assumption given that the unpooled assumption yields over confident estimates in practice 

509 (barring the appropriate corrections). 

510 

511 Returns 

512 ------- 

513 float 

514 test statistic (t statistic) 

515 float 

516 standard error 

517 """ 

518 n1 = sample1.shape[0] 

519 n2 = sample2.shape[0] 

520 mu1 = sample1.mean() 

521 mu2 = sample2.mean() 

522 sigma1 = sample1.std() 

523 sigma2 = sample2.std() 

524 return _two_samp_z(n1=n1, n2=n2, mu1=mu1, mu2=mu2, sigma1=sigma1, sigma2=sigma2, null_h=null_h, 

525 pooled=pooled) 

526 

527################################################################################ 

528# student t tests 

529 

530 

531def _one_samp_t(n: int, mu: float, sigma: float, null_h: float = 0.0) -> Tuple[float, float, float]: 

532 """ 

533 

534 Parameters 

535 ---------- 

536 n: int 

537 The number of samples (observations). 

538 mu: float 

539 The mean of the sample data. 

540 sigma: float 

541 The standard deviation of the sample data. 

542 null_h: float 

543 The point null hypothesis to use when comparing the means. 

544 

545 Returns 

546 ------- 

547 float 

548 test statistic (t statistic) 

549 float 

550 standard error 

551 float 

552 degrees freedom 

553 """ 

554 se = sigma / np.sqrt(n) 

555 t = (mu - null_h) / se 

556 df = n - 1.0 

557 return t, se, df 

558 

559 

560def one_samp_t(sample: ndarray, null_h: float = 0.0) -> Tuple[float, float, float]: 

561 """ 

562 

563 Parameters 

564 ---------- 

565 sample: ndarray 

566 An array of samples (observations). 

567 null_h: float 

568 The point null hypothesis to use when comparing the means. 

569 

570 Returns 

571 ------- 

572 float 

573 test statistic (t statistic) 

574 float 

575 standard error 

576 float 

577 degrees freedom 

578 """ 

579 n = sample.shape[0] 

580 mu = sample.mean() 

581 sigma = sample.std(ddof=1) 

582 return _one_samp_t(n=n, mu=mu, sigma=sigma, null_h=null_h) 

583 

584 

585def paired_t(sample1: ndarray, sample2: ndarray, null_h: float = 0.0) -> Tuple[float, float, float]: 

586 """ 

587 

588 Parameters 

589 ---------- 

590 sample1 : ndarray 

591 A numpy array with the unit level data from the first sample. 

592 sample2 : ndarray 

593 A numpy array with the unit level data from the second sample. Must be the same dimensions 

594 as sample1. 

595 null_h: float 

596 The point null hypothesis to use when comparing the means. 

597 

598 Returns 

599 ------- 

600 float 

601 test statistic (t statistic) 

602 float 

603 standard error 

604 float 

605 degrees freedom 

606 """ 

607 return one_samp_t(sample=sample1-sample2, null_h=null_h) 

608 

609 

610def _two_samp_t(n1: int, n2: int, mu1: float, mu2: float, sigma1: float, sigma2: float, 

611 null_h: float = 0.0, pooled: bool = False) -> Tuple[float, float, float]: 

612 """ 

613 A simple function for running two sample student t tests. 

614 

615 Calculate the standard deviation assuming one degree of freedom. For 

616 example, using numpy np.std(ddof=1). 

617 

618 Parameters 

619 ---------- 

620 n1: int 

621 The sample size for the first sample. 

622 n2: int 

623 The sample size for the second sample. 

624 mu1: float 

625 The mean of the first sample. 

626 mu2: float 

627 The mean of the second sample. 

628 sigma1: float 

629 The standard deviation of the first sample. 

630 sigma2: float 

631 The standard deviation of the second sample. 

632 null_h: float 

633 The point null hypothesis to use when comparing the means. 

634 pooled: bool 

635 Indicates whether to use the assumptions that the sample variances are equal or not. 

636 Pooled = True assumes that the variances are equal. The un-pooled t-test is sometimes 

637 called Welch's t-test. 

638 

639 Returns 

640 ------- 

641 float 

642 test statistic (t statistic) 

643 float 

644 standard error 

645 float 

646 degrees freedom 

647 

648 Examples 

649 -------- 

650 >>> t_stat, se, df = two_samp_t( 

651 >>> n1 = 13, n2 = 10, 

652 >>> mu1 = 1.1, mu2 = 1.0, 

653 >>> sigma1 = 3.0, sigma2 = 2.0, 

654 >>> null_h = 0.0, pooled = False 

655 >>> ) 

656 

657 References 

658 ---------- 

659 Bernard Rosner 

660 * Eq 8.11, 8.21 Fundamentals of Biostatistics 

661 """ 

662 

663 # v1 = sigma1**2.0 

664 # v2 = sigma2**2.0 

665 # if pooled: 

666 # df = n1 + n2 - 2.0 

667 # svar = ((n1 - 1.0) * v1 + (n2 - 1.0) * v2) / df 

668 # se = np.sqrt( svar * (1.0 / n1 + 1.0 / n2)) 

669 # else: 

670 # vn1 = v1 / n1 

671 # vn2 = v2 / n2 

672 # df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1)) 

673 # se = np.sqrt(vn1 + vn2) 

674 

675 if pooled: 

676 df = n1 + n2 - 2.0 

677 sp = np.sqrt(((n1 - 1.0)*sigma1**2.0 + (n2 - 1.0)*sigma2**2.0) / df) 

678 se = sp * np.sqrt(1.0 / n1 + 1.0 / n2) 

679 else: 

680 C = (sigma1**2.0 / n1) / (sigma1**2.0 / n1 + sigma2**2.0 / n2) 

681 df = (n1 - 1.0) * (n2 - 1.0) / \ 

682 ((n2 - 1.0) * C**2.0 + (1.0 - C)**2.0 * (n1 - 1.0)) 

683 se = np.sqrt(sigma1**2.0 / n1 + sigma2**2.0 / n2) 

684 

685 t = (mu1 - mu2 - null_h) / se 

686 return t, se, df 

687 

688 

689def two_samp_t(sample1: ndarray, sample2: ndarray, null_h: float = 0.0, pooled: bool = False) -> \ 

690 Tuple[float, float, float]: 

691 """ 

692 A simple function for running two sample student t tests. 

693 

694 Parameters 

695 ---------- 

696 sample1 : ndarray 

697 A numpy array with the unit level data from the first sample. 

698 sample2 : ndarray 

699 A numpy array with the unit level data from the second sample. 

700 null_h: float 

701 The point null hypothesis to use when comparing the means. 

702 pooled: bool 

703 Indicates whether to use the assumptions that the sample variances are equal or not. 

704 Pooled = True assumes that the variances are equal. The un-pooled t-test is sometimes 

705 called Welch's t-test 

706 

707 Returns 

708 ------- 

709 float 

710 test statistic (t statistic) 

711 float 

712 standard error 

713 float 

714 degrees freedom 

715 

716 Examples 

717 -------- 

718 >>> t_stat, se, df = two_samp_t( 

719 >>> sample1 = np.asarray([1, 2, 3]), 

720 >>> sample2 = np.asarray([1, 2, 3]), 

721 >>> null_h = 0.0, pooled = False 

722 >>> ) 

723 

724 References 

725 ---------- 

726 Bernard Rosner 

727 * Eq 8.11, 8.21 Fundamentals of Biostatistics 

728 """ 

729 

730 n1 = sample1.shape[0] 

731 n2 = sample2.shape[0] 

732 mu1 = sample1.mean() 

733 mu2 = sample2.mean() 

734 sigma1 = sample1.std(ddof=1) 

735 sigma2 = sample2.std(ddof=1) 

736 return _two_samp_t( 

737 n1=n1, n2=n2, 

738 mu1=mu1, mu2=mu2, 

739 sigma1=sigma1, sigma2=sigma2, 

740 null_h=null_h, pooled=pooled 

741 )