π¦MDE#

ΠΠΎΡΠ½ΠΎΡΡΡΡ ΡΡΠ°ΡΠΈΡΡΠΈΡΠ΅ΡΠΊΠΎΠ³ΠΎ ΠΊΡΠΈΡΠ΅ΡΠΈΡ Π½Π°Π·ΡΠ²Π°ΡΡ Π²Π΅ΡΠΎΡΡΠ½ΠΎΡΡΡ ΠΏΡΠΈΠ½ΡΡΡ Π°Π»ΡΡΠ΅ΡΠ½Π°ΡΠΈΠ²Π½ΡΡ Π³ΠΈΠΏΠΎΡΠ΅Π·Ρ Π² ΡΠ»ΡΡΠ°Π΅, Π΅ΡΠ»ΠΈ ΠΎΠ½Π° Π²Π΅ΡΠ½Π°.
MDE - ΡΡΠΎ ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡΠ½ΡΠΉ ΡΡΡΠ΅ΠΊΡ, ΠΊΠΎΡΠΎΡΡΠΉ ΠΌΠΎΠΆΠ½ΠΎ ΠΎΠ±Π½Π°ΡΡΠΆΠΈΡΡ ΠΏΡΠΈ Π²ΡΠ±ΡΠ°Π½Π½ΠΎΠΌ ΡΡΠΎΠ²Π½Π΅ Π·Π½Π°ΡΠΈΠΌΠΎΡΡΠΈ ΠΈ ΠΌΠΎΡΠ½ΠΎΡΡΠΈ.
ΠΠ°ΡΠ΅ΠΌΠ°ΡΠΈΡΠ΅ΡΠΊΠΎΠ΅ ΠΎΠ±ΠΎΡΠ½ΠΎΠ²Π°Π½ΠΈΠ΅#
ΠΡΡΡΡ \(X_1, X_2, ..., X_n \sim N(\mu, \sigma^2_0)\)
ΠΠΈΠΏΠΎΡΠ΅Π·Ρ:
\(H0: \mu = \mu_0\)
\(H1: \mu = \mu_1\), (\(\mu_0 < \mu_1\))
ΠΠΎΡΠΏΠΎΠ»ΡΠ·ΡΠ΅ΠΌΡΡ ΠΊΡΠΈΡΠ΅ΡΠΈΠ΅ΠΌ ΠΎΡΠ½ΠΎΡΠ΅Π½ΠΈΡ ΠΏΡΠ°Π²Π΄ΠΎΠΏΠΎΠ΄ΠΎΠ±ΠΈΡ:
ΠΡΠ° Π²Π΅Π»ΠΈΡΠΈΠ½Π° Π·Π°Π²ΠΈΡΠΈΡ ΠΎΡ Π²ΡΠ±ΠΎΡΠΊΠΈ Π² ΠΎΠ΄Π½ΠΎΠΌ ΠΌΠ΅ΡΡΠ΅. ΠΡΠΈΡΠ΅ΡΠΈΠΉ ΠΈΠΌΠ΅Π΅Ρ Π²ΠΈΠ΄ \(T(X) \geq c^*(\alpha)\) ΠΈΠ»ΠΈ \(\sum_{i=1}^n X_i \geq c(\alpha)\), Π³Π΄Π΅ \(\alpha\) - ΡΡΠΎΠ²Π΅Π½Ρ ΡΡΠ°ΡΠΈΡΡΠΈΡΠ΅ΡΠΊΠΎΠΉ Π·Π½Π°ΡΠΈΠΌΠΎΡΡΠΈ.
ΠΡ Ρ ΠΎΡΠΈΠΌ Π·Π°ΡΠΈΠΊΡΠΈΡΠΎΠ²Π°ΡΡ ΠΎΡΠΈΠ±ΠΊΡ 1 ΡΠΎΠ΄Π°: \(P(\sum X_i \geq q| H_0) \leq \alpha\). Π’.Π΅. Ρ ΠΎΡΠΈΠΌ ΠΏΠΎΠ΄ΠΎΠ±ΡΠ°ΡΡ ΡΠ°ΠΊΡΡ \(c\), ΡΡΠΎΠ±Ρ Π½Π°ΡΠ° ΡΡΠ°ΡΠΈΡΡΠΈΠΊΠ° Π²ΡΡ ΠΎΠ΄ΠΈΠ»Π° Π·Π° Π΅Π΅ ΠΏΡΠ΅Π΄Π΅Π»Ρ Ρ Π²Π΅ΡΠΎΡΡΠ½ΠΎΡΡΡΡ \(\leq \alpha\).
ΠΠΎ Π¦ΠΠ’ \(\sum X_i \sim N(n\mu, nVar(X_i))\). Π Π½Π°ΡΠ΅ΠΌ ΡΠ»ΡΡΠ°Π΅ \(\sum X_i \sim N(n\mu, n\sigma^2_0)\)
ΠΡΠ½ΠΎΡΠΌΠΈΡΡΠ΅ΠΌ ΠΈ ΡΠ°ΡΡΠΌΠΎΡΡΠΈΠΌ ΠΊΡΠ°ΠΉΠ½ΠΈΠΉ ΡΠ»ΡΡΠ°ΠΉ, ΠΊΠΎΠ³Π΄Π° \(P(\cdot) = \alpha\):
ΠΠ΅ΡΠ΅ΠΏΠΈΡΠ΅ΠΌ Π² ΡΠ»Π΅Π΄ΡΡΡΠ΅ΠΌ Π²ΠΈΠ΄Π΅:
ΠΡΡΠ°Π·ΠΈΠΌ \(c\):
ΠΠ°ΠΌΠ΅ΡΠΈΠΌ, ΡΡΠΎ \(c\) Π½Π΅ Π·Π°Π²ΠΈΡΠΈΡ ΠΎΡ \(\mu_1\) ΠΈ \(\forall \mu_1: \mu_1 > \mu_0\).
Π Π°Π·Π±Π΅ΡΠ΅ΠΌΡΡ ΡΠ΅ΠΏΠ΅ΡΡ Ρ ΠΎΡΠΈΠ±ΠΊΠΎΠΉ 2 ΡΠΎΠ΄Π°, Ρ.Π΅. \(P(\sum X_i \geq c | H_1) \geq 1-\beta\) Π Π°ΡΠΏΠΈΡΠ΅ΠΌ:
ΠΠΎΠ΄ΡΡΠ°Π²ΠΈΠΌ ΡΡΠ΄Π° \(c\), ΠΏΠΎΠ»ΡΡΠ΅Π½Π½ΠΎΠ΅ ΡΠ°Π½Π΅Π΅:
ΠΡΡΡΡ \(\mu_1 - \mu_0 = \varepsilon\) - ΠΎΠΆΠΈΠ΄Π°Π΅ΠΌΡΠΉ ΡΡΡΠ΅ΠΊΡ Π² Π°Π±ΡΠΎΠ»ΡΡΠ½ΡΡ Π²Π΅Π»ΠΈΡΠΈΠ½Π°Ρ .
ΠΡΡΠ°Π·ΠΈΠΌ \(\varepsilon\):
ΠΠΎΠΊΠ°ΠΆΠ΅ΠΌ, ΡΡΠΎ \(F^{-1}_{N(0, 1)}(\beta) = - F^{-1}_{N(0, 1)}(1-\beta)\)
ΠΡΡΡΡ \(F_{N(0, 1)}(x) = \beta\). ΠΠ·Π²Π΅ΡΡΠ½ΠΎ, ΡΡΠΎ \(F_{N(0, 1)}(x) + F_{N(0, 1)}(-x) = 1\), ΡΠΎΠ³Π΄Π°
\(F_{N(0, 1)}(-x) = 1 - F_{N(0, 1)}(x) = 1 - \beta\)
\(x = F_{N(0, 1)}(1-\beta)\)
Π‘ Π΄ΡΡΠ³ΠΎΠΉ ΡΡΠΎΡΠΎΠ½Ρ: \(x = F_{N(0, 1)}(\beta)\)
ΠΠΎΠ΄ΡΡΠ°Π²ΠΈΠ², ΠΏΠΎΠ»ΡΡΠΈΠΌ Π΄ΠΎΠΊΠ°Π·ΡΠ²Π°Π΅ΠΌΠΎΠ΅ ΡΠ°Π²Π΅Π½ΡΡΠ²ΠΎ
ΠΠΎΠΊΠ°Π·ΡΠ²Π°Π΅Ρ, ΠΊΠ°ΠΊΠΎΠΉ ΡΠ°Π·ΠΌΠ΅Ρ ΡΡΡΠ΅ΠΊΡΠ° \(\varepsilon\) ΠΌΡ ΡΠΏΠΎΡΠΎΠ±Π½Ρ ΠΎΠ±Π½Π°ΡΡΠΆΠΈΡΡ ΠΏΡΠΈ Π·Π°Π΄Π°Π½Π½ΡΡ
\(\alpha\) ΠΈ \(\beta\), Π²ΡΠ±ΠΎΡΠΊΠΈ ΡΠ°Π·ΠΌΠ΅ΡΠ° \(n\) ΠΈ Π΄ΠΈΡΠΏΠ΅ΡΡΠΈΡΠΌΠΈ \(\sigma_0\).
Π Π΅Π·ΡΠΌΠ΅
MDE - ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡΠ½ΡΠΉ ΡΡΡΠ΅ΠΊΡ, ΠΊΠΎΡΠΎΡΡΠΉ ΠΌΠΎΠΆΠ΅ΠΌ ΠΏΠΎΠΉΠΌΠ°ΡΡ.
\(\varepsilon\) - ΡΠ°Π·ΠΌΠ΅Ρ ΡΡΡΠ΅ΠΊΡΠ° (Π² Π°Π±ΡΠΎΠ»ΡΡΠ½ΡΡ Π²Π΅Π»ΠΈΡΠΈΠ½Π°Ρ )
\(\alpha\) - Π΄ΠΎΠΏΡΡΡΠΈΠΌΠ°Ρ ΠΎΡΠΈΠ±ΠΊΠ° 1 ΡΠΎΠ΄Π°
\(\beta\) - Π΄ΠΎΠΏΡΡΡΠΈΠΌΠ°Ρ ΠΎΡΠΈΠ±ΠΊΠ° 2 ΡΠΎΠ΄Π°
\(\sigma^2_x, \sigma^2_y\) - Π΄ΠΈΡΠΏΠ΅ΡΡΠΈΠΈ Π²ΡΠ±ΠΎΡΠΎΠΊ
\(n\) - ΡΠ°Π·ΠΌΠ΅Ρ Π²ΡΠ±ΠΎΡΠΊΠΈ
ΠΡΠ΅Π½ΠΈΠΌ ΡΠ°Π·ΠΌΠ΅Ρ Π²ΡΠ±ΠΎΡΠΊΠΈ, ΠΊΠΎΡΠΎΡΡΠΉ Π½Π΅ΠΎΠ±Ρ ΠΎΠ΄ΠΈΠΌ, ΡΡΠΎΠ±Ρ ΠΎΠ±Π½Π°ΡΡΠΆΠΈΡΡ ΠΎΠΆΠΈΠ΄Π°Π΅ΠΌΡΠΉ ΡΡΡΠ΅ΠΊΡ ΠΏΡΠΈ ΡΠΈΠΊΡΠΈΡΠΎΠ²Π°Π½Π½ΡΡ ΠΎΡΠΈΠ±ΠΊΠ°Ρ ΠΏΠ΅ΡΠ²ΠΎΠ³ΠΎ ΠΈ Π²ΡΠΎΡΠΎΠ³ΠΎ ΡΠΎΠ΄Π°:
ΠΠΎ ΡΡΡΠΈ, Π²Π»ΠΈΡΡΡ ΠΌΡ ΠΌΠΎΠΆΠ΅ΠΌ ΡΠΎΠ»ΡΠΊΠΎ Π½Π° Π΄ΠΈΡΠΏΠ΅ΡΡΠΈΠΈ. Π ΠΎΠ± ΡΡΠΎΠΌ Π΄Π°Π»ΡΡΠ΅.
import math
from IPython.display import display
import numpy as np
from scipy import stats
import pandas as pd
class Mde:
@classmethod
def get_diff(
cls,
first_type_error: float,
second_type_error: float,
n_x: int,
std_x: float,
n_y: int = None,
std_y: float = None,
two_sided: bool = True
) -> float:
if std_y is None:
std_y = std_x
if n_y is None:
n_y = n_x
f_alpha = stats.norm(0, 1).ppf(1-first_type_error/(1+two_sided))
f_beta = stats.norm(0, 1).ppf(1-second_type_error)
se = math.sqrt(std_x**2 / n_x + std_y**2/n_y)
return (f_alpha + f_beta) * se
@classmethod
def get_sample_size_abs(
cls,
effect: float,
first_type_error: float,
second_type_error: float,
std_x: float,
std_y: float = None,
two_sided: bool = True
) -> int:
if std_y is None:
std_y = std_x
f_alpha = stats.norm(0, 1).ppf(1-first_type_error/(1+two_sided))
f_beta = stats.norm(0, 1).ppf(1-second_type_error)
norm_std = (std_x**2 + std_y**2) / effect**2
return math.ceil((f_alpha + f_beta)**2 * norm_std)
@classmethod
def get_sample_size_arb(
cls,
mu: float,
effect_percantage: float,
first_type_error: float,
second_type_error: float,
std_x: float,
std_y: float = None,
two_sided: bool = True
) -> int:
new_mu = mu * (1 + effect_percantage)
return cls.get_sample_size_abs(new_mu - mu, first_type_error, second_type_error, std_x, std_y, two_sided)
@classmethod
def get_sample_size_table(
cls,
mu: float,
std_x: float,
effect_percantage: list[int] = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1],
first_type_error: float = [0.01, 0.03, 0.05, 0.1],
second_type_error: float = [0.05, 0.1, 0.15, 0.2, 0.25],
std_y: float = None,
two_sided: bool = True
) -> None:
multi_index = []
values = []
for alpha in first_type_error:
for effect in effect_percantage:
betas = []
multi_index.append((alpha, f'{int(effect*100)}%'))
for beta in second_type_error:
betas.append(
cls.get_sample_size_arb(
mu=mu,
effect_percantage=effect,
first_type_error=alpha,
second_type_error=beta,
std_x=std_x,
std_y=std_y,
two_sided=two_sided
)
)
values.append(betas)
multi_index = pd.MultiIndex.from_tuples(multi_index, names=['alpha', 'effect'])
with pd.option_context('display.max_rows', None):
display(pd.DataFrame(
values,
index=multi_index,
columns=[f'beta: {second_type_error[0]}'] + second_type_error[1:])
)
return None
Mde.get_sample_size_table(10, 1)
| beta: 0.05 | 0.1 | 0.15 | 0.2 | 0.25 | ||
|---|---|---|---|---|---|---|
| alpha | effect | |||||
| 0.01 | 1% | 3563 | 2976 | 2610 | 2336 | 2113 |
| 2% | 891 | 744 | 653 | 584 | 529 | |
| 3% | 396 | 331 | 290 | 260 | 235 | |
| 4% | 223 | 186 | 164 | 146 | 133 | |
| 5% | 143 | 120 | 105 | 94 | 85 | |
| 6% | 99 | 83 | 73 | 65 | 59 | |
| 7% | 73 | 61 | 54 | 48 | 44 | |
| 8% | 56 | 47 | 41 | 37 | 34 | |
| 9% | 44 | 37 | 33 | 29 | 27 | |
| 10% | 36 | 30 | 27 | 24 | 22 | |
| 0.03 | 1% | 2911 | 2383 | 2057 | 1815 | 1619 |
| 2% | 728 | 596 | 515 | 454 | 405 | |
| 3% | 324 | 265 | 229 | 202 | 180 | |
| 4% | 182 | 149 | 129 | 114 | 102 | |
| 5% | 117 | 96 | 83 | 73 | 65 | |
| 6% | 81 | 67 | 58 | 51 | 45 | |
| 7% | 60 | 49 | 42 | 38 | 34 | |
| 8% | 46 | 38 | 33 | 29 | 26 | |
| 9% | 36 | 30 | 26 | 23 | 20 | |
| 10% | 30 | 24 | 21 | 19 | 17 | |
| 0.05 | 1% | 2599 | 2102 | 1796 | 1570 | 1389 |
| 2% | 650 | 526 | 449 | 393 | 348 | |
| 3% | 289 | 234 | 200 | 175 | 155 | |
| 4% | 163 | 132 | 113 | 99 | 87 | |
| 5% | 104 | 85 | 72 | 63 | 56 | |
| 6% | 73 | 59 | 50 | 44 | 39 | |
| 7% | 54 | 43 | 37 | 33 | 29 | |
| 8% | 41 | 33 | 29 | 25 | 22 | |
| 9% | 33 | 26 | 23 | 20 | 18 | |
| 10% | 26 | 22 | 18 | 16 | 14 | |
| 0.10 | 1% | 2165 | 1713 | 1438 | 1237 | 1076 |
| 2% | 542 | 429 | 360 | 310 | 269 | |
| 3% | 241 | 191 | 160 | 138 | 120 | |
| 4% | 136 | 108 | 90 | 78 | 68 | |
| 5% | 87 | 69 | 58 | 50 | 44 | |
| 6% | 61 | 48 | 40 | 35 | 30 | |
| 7% | 45 | 35 | 30 | 26 | 22 | |
| 8% | 34 | 27 | 23 | 20 | 17 | |
| 9% | 27 | 22 | 18 | 16 | 14 | |
| 10% | 22 | 18 | 15 | 13 | 11 |