$$ \newcommand{\defeq}{\stackrel{\small\bullet}{=}} \newcommand{\ra}{\rangle} \newcommand{\la}{\langle} \newcommand{\norm}[1]{\left\|#1\right\|} \newcommand{\abs}[1]{\left\lvert#1\right\rvert} \newcommand{\Abs}[1]{\Bigl\lvert#1\Bigr\rvert} \newcommand{\pr}{{\mathbb P}} \newcommand{\qr}{{\mathbb Q}} \newcommand{\xv}{{\boldsymbol{x}}} \newcommand{\av}{{\boldsymbol{a}}} \newcommand{\bv}{{\boldsymbol{b}}} \newcommand{\cv}{{\boldsymbol{c}}} \newcommand{\dv}{{\boldsymbol{d}}} \newcommand{\ev}{{\boldsymbol{e}}} \newcommand{\fv}{{\boldsymbol{f}}} \newcommand{\gv}{{\boldsymbol{g}}} \newcommand{\hv}{{\boldsymbol{h}}} \newcommand{\nv}{{\boldsymbol{n}}} \newcommand{\sv}{{\boldsymbol{s}}} \newcommand{\tv}{{\boldsymbol{t}}} \newcommand{\uv}{{\boldsymbol{u}}} \newcommand{\vv}{{\boldsymbol{v}}} \newcommand{\wv}{{\boldsymbol{w}}} \newcommand{\zerov}{{\mathbf{0}}} \newcommand{\onev}{{\mathbf{0}}} \newcommand{\phiv}{{\boldsymbol{\phi}}} \newcommand{\cc}{{\check{C}}} \newcommand{\xv}{{\boldsymbol{x}}} \newcommand{\Xv}{{\boldsymbol{X}\!}} \newcommand{\yv}{{\boldsymbol{y}}} \newcommand{\Yv}{{\boldsymbol{Y}}} \newcommand{\zv}{{\boldsymbol{z}}} \newcommand{\Zv}{{\boldsymbol{Z}}} \newcommand{\Iv}{{\boldsymbol{I}}} \newcommand{\Jv}{{\boldsymbol{J}}} \newcommand{\Cv}{{\boldsymbol{C}}} \newcommand{\Ev}{{\boldsymbol{E}}} \newcommand{\Fv}{{\boldsymbol{F}}} \newcommand{\Gv}{{\boldsymbol{G}}} \newcommand{\Hv}{{\boldsymbol{H}}} \newcommand{\alphav}{{\boldsymbol{\alpha}}} \newcommand{\epsilonv}{{\boldsymbol{\epsilon}}} \newcommand{\betav}{{\boldsymbol{\beta}}} \newcommand{\deltav}{{\boldsymbol{\delta}}} \newcommand{\gammav}{{\boldsymbol{\gamma}}} \newcommand{\etav}{{\boldsymbol{\eta}}} \newcommand{\piv}{{\boldsymbol{\pi}}} \newcommand{\thetav}{{\boldsymbol{\theta}}} \newcommand{\tauv}{{\boldsymbol{\tau}}} \newcommand{\muv}{{\boldsymbol{\mu}}} \newcommand{\phiinv}{\Phi^{-1}} \newcommand{\Fiinv}{F^{-1}} \newcommand{\giinv}{g^{-1}} \newcommand{\fhat}{\hat{f}} \newcommand{\ghat}{\hat{g}} \newcommand{\ftheta}{f_\theta} \newcommand{\fthetav}{f_{\thetav}} \newcommand{\gtheta}{g_\theta} \newcommand{\gthetav}{g_{\thetav}} \newcommand{\ztheta}{Z_\theta} \newcommand{\xtheta}{\Xv_\theta} \newcommand{\ytheta}{\Yv_\theta} \newcommand{\p}{\partial} \newcommand{\f}{\frac} \newcommand{\cf}{\cfrac} \newcommand{\e}{\epsilon} \newcommand{\indep}{\perp\kern-5pt \perp} \newcommand{\inner}[1]{\langle#1\rangle} \newcommand{\pa}[1]{\left(#1\right)} \newcommand{\pb}[1]{\left\{#1\right\}} \newcommand{\pc}[1]{\left[#1\right]} \newcommand{\pA}[1]{\Big(#1\Big)} \newcommand{\pB}[1]{\Big\{#1\Big\}} \newcommand{\pC}[1]{\Big[#1\Big]} \newcommand{\ty}[1]{\texttt{#1}} \newcommand{\borel}[1]{\mathscr{B}\pa{#1}} \newcommand{\scr}{\mathcal} \newcommand{\scrb}{\mathscr} \newcommand{\argmin}{\mathop{\text{arg}\ \!\text{min}}} \newcommand{\arginf}{\mathop{\text{arg}\ \!\text{inf}}} \newcommand{\argmax}{\mathop{\text{arg}\ \!\text{max}}} \newcommand{\argsup}{\mathop{\text{arg}\ \!\text{sup}}} \newcommand{\bigo}[1]{\mathcal{O}_{p}\!\left(#1\right)} \newcommand{\f}{\frac} \newcommand{\e}{\epsilon} \newcommand{\inv}{^{-1}} \newcommand{\phiinv}{\Phi^{-1}} \newcommand{\Fiinv}{F^{-1}} \newcommand{\giinv}{g^{-1}} \newcommand{\fhat}{\hat{f}} \newcommand{\ghat}{\hat{g}} \newcommand{\ftheta}{f_\theta} \newcommand{\fthetav}{f_{\thetav}} \newcommand{\gtheta}{g_\theta} \newcommand{\gthetav}{g_{\thetav}} \newcommand{\ztheta}{Z_\theta} \newcommand{\xtheta}{\Xv_\theta} \newcommand{\ytheta}{\Yv_\theta} \newcommand{\absdet}[1]{\abs{\det\pa{#1}}} \newcommand{\jac}[1]{\Jv_{#1}} \newcommand{\absdetjx}[1]{\abs{\det\pa{\Jv_{#1}}}} \newcommand{\absdetj}[1]{\norm{\Jv_{#1}}} \newcommand{\sint}{sin(\theta)} \newcommand{\cost}{cos(\theta)} \newcommand{\sor}[1]{S\mathcal{O}(#1)} \newcommand{\ort}[1]{\mathcal{O}(#1)} \newcommand{\A}{{\mathcal A}} \newcommand{\C}{{\mathbb C}} \newcommand{\E}{{\mathbb E}} \newcommand{\F}{{\mathcal{F}}} \newcommand{\N}{{\mathbb N}} \newcommand{\R}{{\mathbb R}} \newcommand{\Q}{{\mathbb Q}} \newcommand{\Z}{{\mathbb Z}} \newcommand{\X}{{\mathbb{X}}} \newcommand{\Y}{{\mathbb{Y}}} \newcommand{\G}{{\mathcal{G}}} \newcommand{\M}{{\mathcal{M}}} \newcommand{\betaequivalent}{\beta\text{-equivalent}} \newcommand{\betaequivalence}{\beta\text{-equivalence}} \newcommand{\Mb}{{\boldsymbol{\mathsf{M}}}} \newcommand{\Br}{{\mathbf{\mathsf{Bar}}}} \newcommand{\dgm}{{\mathfrak{Dgm}}} \newcommand{\Db}{{\mathbf{\mathsf{D}}}} \newcommand{\Img}{{\mathbf{\mathsf{Img}}}} \newcommand{\mmd}{{\mathbf{\mathsf{MMD}}}} \newcommand{\Xn}{{\mathbb{X}_n}} \newcommand{\Xm}{{\mathbb{X}_m}} \newcommand{\Yn}{{\mathbb{Y}_n}} \newcommand{\Ym}{Y_1, Y_2, \cdots, Y_m} \newcommand{\Xb}{{\mathbb{X}}} \newcommand{\Yb}{{\mathbb{Y}}} \newcommand{\s}{{{\sigma}}} \newcommand{\fnsbar}{{\bar{f}^n_\s}} \newcommand{\fns}{{f^n_\s}} \newcommand{\fs}{{f_\s}} \newcommand{\fsbar}{{\bar{f}_\s}} \newcommand{\barfn}{{{f}^n_\sigma}} \newcommand{\barfnm}{{{f}^{n+m}_\sigma}} \newcommand{\barfo}{{{f}_\sigma}} \newcommand{\fn}{{f^n_{\rho,\sigma}}} \newcommand{\fnm}{{f^{n+m}_{\rho,\sigma}}} \newcommand{\fo}{{f_{\rho,\sigma}}} \newcommand{\K}{{{K_{\sigma}}}} \newcommand{\barpn}{{\bar{p}^n_\sigma}} \newcommand{\barpo}{{\bar{p}_\sigma}} \newcommand{\pn}{{p^n_\sigma}} \newcommand{\po}{{p_\sigma}} \newcommand{\J}{{\mathcal{J}}} \newcommand{\B}{{\mathcal{B}}} \newcommand{\pt}{{\tilde{\mathbb{P}}}} \newcommand{\Winf}{{W_{\infty}}} \newcommand{\winf}{{W_{\infty}}} \newcommand{\HH}{{{\scr{H}_{\sigma}}}} \newcommand{\D}{{{\scr{D}_{\sigma}}}} \newcommand{\Ts}{{T_{\sigma}}} \newcommand{\Phis}{{\Phi_{\sigma}}} \newcommand{\nus}{{\nu_{\sigma}}} \newcommand{\Qs}{{\mathcal{Q}_{\sigma}}} \newcommand{\ws}{{w_{\sigma}}} \newcommand{\vs}{{v_{\sigma}}} \newcommand{\ds}{{\delta_{\sigma}}} \newcommand{\fp}{{f_{\pr}}} \newcommand{\prs}{{\widetilde{\pr}_{\sigma}}} \newcommand{\qrs}{{\widetilde{\qr}_{\sigma}}} \newcommand{\Inner}[1]{\Bigl\langle#1\Bigr\rangle} \newcommand{\innerh}[1]{\langle#1\rangle_{\HH}} \newcommand{\Innerh}[1]{\Bigl\langle#1\Bigr\rangle_{\HH}} \newcommand{\normh}[1]{\norm{#1}_{\HH}} \newcommand{\norminf}[1]{\norm{#1}_{\infty}} \newcommand{\gdelta}{{\G_{\delta}}} \newcommand{\supgdelta}{{\sup\limits_{g\in\gdelta}\abs{\Delta_n(g)}}} \newcommand{\id}{\text{id}} \newcommand{\supp}{\text{supp}} \newcommand{\cech}{\v{C}ech} \newcommand{\Zz}{{\scr{Z}}} \newcommand{\psis}{\psi_\s} \newcommand{\phigox}{\Phis(\xv)-g} \newcommand{\phigoy}{\Phis(\yv)-g} \newcommand{\fox}{{f^{\epsilon,{\xv}}_{\rho,\sigma}}} \newcommand{\prx}{{\pr^{\epsilon}_{\xv}}} \newcommand{\pro}{{\pr_0}} \newcommand{\dotfo}{\dot{f}_{\!\!\rho,\s}} \newcommand{\phifo}{{\Phis(\yv)-\fo}} \newcommand{\phifox}{{\Phis(\xv)-\fo}} \newcommand{\kinf}{{\norm{\K}_{\infty}}} \newcommand{\half}{{{\f{1}{2}}}} \newcommand{\Jx}{\J_{\epsilon,{\xv}}} \newcommand{\dpy}{\text{differential privacy}} \newcommand{\edpy}{$\epsilon$--\text{differential privacy}} \newcommand{\eedpy}{$\epsilon$--edge \text{differential privacy}} \newcommand{\dpe}{\text{differentially private}} \newcommand{\edpe}{$\epsilon$--\text{differentially private}} \newcommand{\eedpe}{$\epsilon$--edge \text{differentially private}} \newcommand{\er}{Erdős-Rényi} \newcommand{\krein}{Kreĭn} % \newcommand{\grdpg}{\mathsf{gRDPG}} % \newcommand{\rdpg}{\mathsf{RDPG}} % \newcommand{\eflip}{{\textsf{edgeFlip}}} % \newcommand{\grdpg}{\text{gRDPG}} % \newcommand{\rdpg}{\text{RDPG}} \newcommand{\grdpg}{\mathsf{gRDPG}} \newcommand{\rdpg}{\mathsf{RDPG}} \newcommand{\eflip}{{\text{edgeFlip}}} \newcommand{\I}{{\mathbb I}} \renewcommand{\pa}[1]{\left(#1\right)} \renewcommand{\pb}[1]{\left\{#1\right\}} \renewcommand{\pc}[1]{\left[#1\right]} \renewcommand{\V}{\mathbb{V}} \renewcommand{\W}{\mathbb{W}} %%%%%%%%%%%%%%%%%%%%%%%%%%% \providecommand{\fd}{\frac 1d} % \renewcommand{\fpp}{{\frac 1p}} \providecommand{\pfac}{\f{p}{p-1}} \providecommand{\ipfac}{\f{p-1}{p}} \providecommand{\dbq}{\Delta b_{n,m,Q}\qty(\qty{\xvo})} \providecommand{\db}{\Delta b_{n,m}\qty(\qty{\xvo})} \providecommand{\bbv}{{{\mathbb{V}}}} \providecommand{\bbw}{{{\mathbb{W}}}} \providecommand{\md}{\textsf{MoM Dist}} \providecommand{\bF}{{\mathbf{F}}} \providecommand{\sub}{{\text{Sub}}} \providecommand{\samp}{\text{$\pa{\scr{S}}$}} \providecommand{\tp}{{2^{\f{p-1}{p}}}} %%%%%%%%%%%%%%%%%%%%%%%%%% \providecommand{\Xmn}{{\mathbb{X}_{n+m}}} \newcommand{\Dnmq}{\D[n+m, Q]} \newcommand{\Dnmh}{\D[n+m, \H]} \newcommand{\Dn}{\D[n]} \providecommand{\xvo}{\xv_0} \providecommand{\bn}[1][\null]{b^{#1}_{n}\pa{\pb{\xvo}}} \providecommand{\bnm}[1][\null]{b^{#1}_{n+m}\pa{\pb{\xvo}}} \providecommand{\bnq}[1][\null]{b^{#1}_{n,Q}\pa{\pb{\xvo}}} \providecommand{\bnmq}[1][\null]{b^{#1}_{n+m,Q}\pa{\pb{\xvo}}}\providecommand{\prq}{\pr_q} \providecommand{\dxvo}{{\delta_{\xvo}}} \providecommand{\sq}{S_q} \providecommand{\Sq}{\abs{S_q}} \providecommand{\no}{{n_o}} \providecommand{\mmdn}{\mmd\pa{\pr_n, \delta_{\xvo}}} \newcommand{\rqt}{\xi_{q}(t; n, Q)} \providecommand{\nq}{\f{n}{Q}} \providecommand{\Ot}{\Omega(t, n/Q)} \providecommand{\ut}[1]{U^{#1}} \providecommand{\vt}[1]{V^{#1}} \providecommand{\wt}[1]{W^{#1}} \providecommand{\but}[1]{\mathbb{U}^{#1}} \providecommand{\bvt}[1]{\mathbb{V}^{#1}} \providecommand{\bwt}[1]{\mathbb{W}^{#1}} \providecommand{\ball}[1]{B_{f\!, \rho}\pa{#1}} \newcommand*{\medcap}{\mathbin{\scalebox{0.75}{{\bigcap}}}}% \newcommand*{\medcup}{\mathbin{\scalebox{0.75}{{\bigcup}}}}% \providecommand{\dsf}{\mathsf{d}} \newcommand{\Dnh}{{\mathsf{D}_{n,\scr{H}}}} \newcommand{\Dph}{{\mathsf{D}_{\pr,\scr{H}}}} \newcommand{\D}[1][1={ },usedefault]{{\mathsf{D}_{#1}}} \newcommand{\Dnq}{{\mathsf{D}_{n, Q}}} \newcommand{\dnq}{{\mathsf{d}_{n, Q}}} \newcommand{\dn}{{\mathsf{d}_{n}}} \newcommand{\dnm}{{\mathsf{d}_{n-m}}} \newcommand{\dmn}{{\mathsf{d}_{n+m}}} \newcommand{\dx}{{\mathsf{d}_{\mathbb{X}}}} \providecommand{\med}{\text{median}} \providecommand{\median}{\text{median}} \providecommand{\Xnm}{{\mathbb{X}^*_{n-m}}} $$

Week-4

Math 183 • Statistical Methods • Spring 2025

Siddharth Vishwanath

Learning objectives

$$ % % % % % % % %%%%%%%%%%%%%%%%%%%%%%%%%%% % %%%%%%%%%%%%%%%%%%%%%%%%%% % %

% % \providecommand{}{p_{0}} \providecommand{}{p_{1}} \providecommand{}{p_{2}} \providecommand{}{p_{12}} \providecommand{1n}{p_{1n}} % % % % $$

  • Examples of discrete random variables
    • Uniform
    • Bernoulli
    • Binomial
    • Geometric
    • Poisson
  • Continuous random variables
    • Probability density functions
    • CDF and PDF via calculus
    • Expected value and variance
  • Examples of continuous random variables
    • Uniform
    • Exponential
    • Normal
    • Chi-squared
    • Central Limit Theorem

Random Variable

Random variable

A random variable is a variable which assumes values based on the outcome of a trial from a random phenomenon.

Tip

Think of a random variable as a placeholder for the different outcomes we can witness from a trial.

Support

The support of a random variable is the universe of all possible values a random variable can assume.

Anatomy of a random variable

Every random variable has:

  1. A mathematical symbol representing it
    • e.g., \(X, Y, Z\)
  1. A support, \(\text{supp}(X)\)

    • This determines the nature of the random variable
  1. A probability distribution \({\mathbb P}_X\).

    • This determines the probability of the random variable taking a specific set of values in its support
  1. Measures of central tendency and dispersion.

    • The measure of central tendency is called its expectation \({\mathbb E}(X)\)
    • The measure of dispersion is called its variance \({\text{Var}}(X)\)

Examples of
Discrete + Quantitative
Random Variables

Looking ahead

We will frequently encounter:

\[ X \sim \texttt{Name}(\theta) \]

Here:

  • \(X\) is a random variable which has its distribution specified by \(\texttt{Name}\) with parameter \(\theta\)
  • \(\texttt{Name}\) describes a particular family of distributions
    • It gives you a clue about what the support of \(X\) is
    • Also specifies a template for how of probability is assigned to \(\text{supp}(X)\)
  • \(\theta\) denotes some parameter
    • It gives you the specific distribution from the family \(\texttt{Name}\) we’re interested in
    • It is often the (unknown) population parameter which we would like to learn/infer

Uniform Distribution

Uniform Random Variable

A uniform random variable \(X\) is a random variable wherein a finite number of values are equally likely to be observed. This is denoted by \(X \sim {\text{Unif}}\left\{a, b\right\}\)

Support

\[\text{supp}(X) = \left\{a, a+1, a+2, \dots, b-1, b\right\}\]

Probability Mass Function

\[ {\mathbb P}(X=x) = \frac 1{b-a+1} \quad \text{ for all } x \in \text{supp}(X) \]

Expected Value and Variance

\[ \begin{aligned} {\mathbb E}(X) = \frac{a+b}{2} \quad {\text{Var}}(X) = \frac{(b-a+1)^2-1}{12}\\ \end{aligned} \]

Examples

Die Roll

  • If \(X\) is the outcome from a roll of a fair die, then \(X \sim {\text{Unif}}\left\{1,6\right\}\)

Extension to non-quantitative support

  • I pick a student from the class at random to help answer this question. The support is now every possible student in this class. But, we can enumerate them (using some logic) as \(\left\{1, 2, 3, \dots, 357\right\}\). Then, the probability of choosing you at random follows… a \({\text{Unif}}\left\{1, 357\right\}\) distribution.

Bernoulli Random Variable

Bernoulli Random Variable

When a random variable \(X\) can only take two possible outcomes, it is referred to as a Bernoulli random variable. This is denoted by \(X \sim {\text{Ber}}(p)\)

\[\begin{aligned}X\end{aligned} = \begin{cases} 1 & \text{with probability } p\\ 0 & \text{with probability } 1-p \end{cases} \]

Examples

  1. \(X\) is the outcome of a coin flip
    • \(\text{supp}(X) = \left\{H, T\right\}\)
  2. \(X\) is the outcome of Hamlet’s dilemma
    • \(\text{supp}(X) = \left\{\text{to be}, \text{not to be}\right\}\)

Properties of \({\text{Ber}}(p)\)

Expectation and Variance

If \(X \sim {\text{Ber}}(\theta)\) then \(p_{X}(x) = p^x (1-p)^{1-x}\) and \[\begin{aligned} {\mathbb E}(X) &= (p \times 1) + \left((1-p) \times 0\right) = p\\\\ {\mathbb E}(X^2) &= (p \times 1^2) + \left((1-p) \times 0^2\right) = p\\\\ {\text{Var}}(X) &= {\mathbb E}(X^2) - {\mathbb E}(X)^2\\ &= p - p^2\\ &= p(1-p) \end{aligned}\]

Sums of Bernoulli Random Variables

  • Let \(X_1\) and \(X_2\) be two \(iid\) \({\text{Ber}}(p)\) random variables
    • \(iid\) = independent and identically distributed
    • Identical:
      • \({\mathbb P}(X_1 = 1) = {\mathbb P}(X_2=1) = p\)
    • Independent:
      • \({\mathbb P}\left(\left\{X_1 = i\right\} \cap \left\{X_2 = j\right\}\right) = {\mathbb P}({X_1 = i}) \times {\mathbb P}({X_2 = j})\)

  • Let \(Y = X_1 + X_2\). What is the probability distribution of \(Y\)?

Sums of Bernoulli Random Variables

  • Let \(X_1\) and \(X_2\) be two \(iid\) \({\text{Ber}}(p)\) random variables

  • Let \(Y = X_1 + X_2\). What is the probability distribution of \(Y\)?

  • The support is \(\text{supp}(Y) = \left\{0, 1, 2\right\}\)

\[\begin{aligned}{\mathbb P}(Y=0) &= {\mathbb P}\left(\left\{X_1=0\right\} \cap \left\{X_2=0\right\}\right)\\ &= {\mathbb P}(X_1=0) \times {\mathbb P}(X_2=0) = (1-p)^2\end{aligned}\]

\[\begin{aligned}{\mathbb P}(Y=2) &= {\mathbb P}\left(\left\{X_1=1\right\} \cap \left\{X_2=1\right\}\right)\\ &= {\mathbb P}(X_1=1) \times {\mathbb P}(X_2=1) = p^2\end{aligned}\]

\[\begin{aligned} {\mathbb P}(Y=1) &= {\mathbb P}\left(\left\{X_1=0 \cap X_2 = 1\right\} \cup \left\{X_1=0 \cap X_2 = 1\right\}\right)\\ &= {\mathbb P}(X_1=0) \times {\mathbb P}({X_2 = 1}) + {\mathbb P}({{X_1=0}}) \times {\mathbb P}({X_2 = 1})\\ &= (1-p) \times p + p \times (1-p)\\ & = 2p(1-p) \end{aligned}\]

Sums of Bernoulli Random Variables

  • Let \(X_1\) and \(X_2\) be two \(iid\) \({\text{Ber}}(p)\) random variables

  • Let \(Y = X_1 + X_2\). What is the probability distribution of \(Y\)?

  • The support is \(\text{supp}(Y) = \left\{0, 1, 2\right\}\)

PMF for \(Y\)

The PMF for \(Y\) is given by \[ p_{Y}(y) = \begin{cases} (1-p)^2 & y=0\\ 2p(1-p) & y=1\\ p^2 & y=2 \end{cases} \]

Binomial Random Variable

Binomial Random Variable

\(X \sim {\text{Bin}}(n, p)\) is called a Binomial random variable it represents the # of successes in \(n\) \(iid\) trials from a \({\text{Ber}}(p)\) random variable, i.e., for \(Z_1, \dots Z_n {\stackrel{iid}{\sim}}Ber(p)\) \[ X = \# \left\{Z_i=1 : 1 \le i \le n\right\} = \sum_{i=1}^n Z_i \]

Example

  • You toss a coin \(n\) times. If \(X\) denotes the number of heads, then \(X \sim {\text{Bin}}(n, {{\frac{1}{2}}})\).
  • You cross 5 freeways to get home from UCSD. If every freeway is jammed with probability \(p\), and \(X=\) #{traffic jams you encounter}, then \(X \sim {\text{Bin}}(5, p)\).

Binomial Random Variable

\(X \sim {\text{Bin}}(n, p)\)

Support

\(\text{supp}(X) = \left\{0, 1, 2, \dots, n\right\}\)

Probability Mass Function

\[ {\mathbb P}(X=k) = {n\choose{k}} p^k (1-p)^{n-k} \quad 0 \le k \le n \]

Expected Value and Variance

\[ \begin{aligned} {\mathbb E}(X) &= n \times p\\ {\text{Var}}(X) &= n \times p(1-p)\\ \end{aligned} \]

Geometric Random Variable

Gilman Parking Structure

You decide to drive to campus, and now you have to deal with the daunting task of finding parking. With probability \(p=0.2\) a parking spot in the Gilman parking structure is empty. Let \(X\) be the random variable indicating the # of parking spots you need to visit before you finally find one that is empty. What the probability distribution of \(X\)?

  • \(\text{supp}(X) = {1, 2, 3, \dots}\)
  • \(p_{X}(1) = {\mathbb P}(X=1) = p\)
  • \(\begin{aligned} p_{X}(2) = {\mathbb P}(X=2) &= {\mathbb P}(\text{no spot on first} \cap \text{spot on second})\\ &= (1-p) \times p \end{aligned}\)
  • \(p_{X}(3) = {\mathbb P}(X=3) = (1-p)^2 \times p\)

Geometric Random Variable

Geometric Random Variable

A Geomteric random variable \(X \sim {\text{Geo}}(p)\) represents the number of trials it takes to observe the first success

Support

\[\text{supp}(X) = \left\{1, 2, 3, \dots\right\}\]

Probability Mass Function

\[ {\mathbb P}(X=k) = (1-p)^{k-1} p \]

Expected Value and Variance

\[ \begin{aligned} {\mathbb E}(X) = \frac 1p \quad {\text{Var}}(X) = \frac{1-p}{p^2}\\ \end{aligned} \]

Examples

Tip

You keep tossing a coin. If \(X\) denotes the number of coin tosses required to get a \(H\), then \(X \sim {\text{Geo}}({{\frac{1}{2}}})\).

Tip

Each bulb in a factory is defective with probability \(p\). If \(X\) is the number of bulbs you need to test to find a defective bulb, then \(X \sim {\text{Geo}}(p)\)

Tip

You are applying for summer internships, and with probability \(p=0.2\) a firm you have applied to gets back to you with an interview. Let \(X\) be the random variable which represents the number of firms you need to apply to before you get a call back for an interview. Then \(X \sim {\text{Geo}}(0.2)\)?

Poisson Distribution

Poisson Random Variable

Suppose some event is known to occur on an ``average” \(\lambda\) times in every fixed unit of time. Then a Poisson random variable, \(X \sim {\text{Poi}}(\lambda)\), expresses the probability of a given number events actually observed in this fixed unit of time.

Example

  • The Bass Pro Shops clerk tells you that if you go fishing in Torrey Pines, you can catch, on average, \(2\) fish every hour. So, you go fishing to Torrey Pines this Sunday. The actual number of fish you catch, \(X\), is a Poisson random variable \({\text{Poi}}(2)\).
  • An AT&T report says their users get, on an average, 2.25 spam calls every day. Then \(X \sim {\text{Poi}}(2.25)\) is the random variable (placeholder) the number of spam calls I get tomorrow.

Poisson Distribution

Support

\[\text{supp}(X) = \left\{0, 1, 2, 3, \dots\right\}\]

Probability Mass Function

\[ {\mathbb P}(X=k) = \frac{\lambda^k \cdot e^{-\lambda}}{k!} \]

Expected Value and Variance

\[ \begin{aligned} {\mathbb E}(X) = \lambda, \quad {\text{Var}}(X) = \lambda\\ \end{aligned} \]

Continuous + Quantitative
Random Variables

Continuous Random Variables

  • So far we have looked at random variables with \(X\) discrete quantiative support

Example

You to pick an integer, \(X\), uniformly at random from \(1\) to \(10\). \(X \sim {\text{Unif}}\left\{1,10\right\}\), \[p_{X}(3) = {\mathbb P}(X=3) = 1/10.\]

  • Suppose you weren’t restricted to pick just an integer

Example

You to pick any number, \(X\), uniformly at random from \(1\) to \(10\). Then

\(p_{X}(3) = {\mathbb P}(X=3) =\)\(0\)

PMF vs CDF

Example

You to pick any number, \(X\), uniformly at random from \(1\) to \(10\). Then, \[p_{X}(3) = {\mathbb P}(X=3) = 0\]

  • Since it is uniform we can still (meaningfully) assign probability to the events like \[\left\{X \le 5.5\right\}\]

  • In particular, the CDF \(F_{X}(x)\) can still be computed \[F_{X}(5.5) = {\mathbb P}({X \le 5.5}) = {{\frac{1}{2}}}\]

Visualizing the Probability Distribution

Discrete Uniform \[{\text{Unif}}\left\{1, 10\right\}\]

Continuous Uniform \[{\text{Unif}}\left(1, 10\right)\]

Visualizing the CDF

\(F_{X}(2) = {\mathbb P}(X \le 2) = \frac{1}{9}\)

Visualizing the CDF

\(F_{X}(5) = {\mathbb P}(X \le 5) = \frac{4}{9}\)

Visualizing the CDF

\(F_{X}(5.5) = {\mathbb P}(X \le 5.5) = \frac{1}{2}\)

Visualizing the CDF

\(F_{X}(7) = {\mathbb P}(X \le 7) = \frac{6}{9}\)

Visualizing the CDF

\(F_{X}(10) = {\mathbb P}(X \le 10) = \frac{9}{9} = 1\)

CDF for \({\text{Unif}}(1, 10)\)

\[ F_{X}(x) = \begin{cases} 0 & \text{ if }x \le 1\\ \\ \frac{x-1}{9} & \text{ if }1 < x \le 10\\ \\ 1 & \text{ if } 10 < x \end{cases} \]

  • In other words:

\[ F_{X}(x) = \int_{1}^{x} f_{X}(x) dx = \int_{1}^{x} \frac{1}{9} dx \quad \quad \text{if } 1 < x \le 10 \]

where \(f_{X}(x) = 1/9\) is the probability density function

Probability Density Function

Definition: Probability Density Function (PDF)

For a continuous random variable \(X\), its probability density function (PDF), denoted by \(f_X(x)\), is a function that describes the likelihood of \(X\) taking on a specific set of values. The probability of the event \(\left\{X \le a\right\}\) is given by: \[ F_{X}(a) = P(X \leq a) = \int\limits_{-\infty}^a f_X(x) \, dx \]

Support

The support of a continuous random variable \(X\) is the set of values where the PDF is strictly positive, i.e., \[ \text{supp}(X) = \left\{x: f_{X}(x) \color{red}{>} 0\right\}. \]

Important Properties of the PDF

Properties of the Probability Density Function (PDF)

  1. Non-negativity: The PDF is always non-negative for all values of \(x\): \[ f_X(x) \geq 0 \]

  2. Area Under the Curve: The total area under the curve of the PDF over its entire range is equal to 1: \[ \int_{-\infty}^{\infty} f_X(x) \, dx = 1 \]

  3. Probabilities from Intervals: The probability that the random variable \(X\) lies in the interval \([a, b]\) is given by the area under the PDF curve from \(a\) to \(b\): \[ P(a \leq X \leq b) = \int_a^b f_X(x) \, dx \]

  4. No Point Probability: For continuous random variables, the probability at any specific point is zero: \[ P(X = x) = 0 \]

PDF vs CDF

The Fundamental Theorem of Calculus states that differentiation and integration are inverse operations. Specifically, if \(G(x)\) is an antiderivative of \(g(x)\) on an interval \([a, b]\), then:

\[ \int_a^b g(x) \, dx = G(b) - G(a) \]

Important

From the fundamental theorem of calculus: The rate of change (or the derivative) of the CDF with respect to \(x\) is the PDF: \[ f_X(x) = \frac{d}{dx} F_X(x) \]

Upper \(\alpha\)-quantile or Upper \(100\times \alpha\%\) percentile

For \(\alpha \in (0, 1)\), the upper \(\alpha\)-quantile of a distribution \(F\), is the value \(q_\alpha\) such that: \[ \begin{aligned} 1-F_X(q_\alpha) = {\mathbb P}(X > q_\alpha) = \alpha\quad\text{equivalently}\quad q_\alpha = F_X^{-1}(1-\alpha) \end{aligned} \]

#| standalone: true
#| viewerHeight: 600
#| components: viewer
#| layout: vertical

import numpy as np
import scipy
from scipy import stats
import scipy.stats as stats
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from shiny import App, render, ui
from matplotlib.patches import Patch

# Generate a random sample

# Define the UI
app_ui = ui.page_fluid(
    ui.layout_sidebar(
        ui.sidebar(
            ui.input_slider(
                "alpha",
                "alpha",
                min = 1e-2,
                max = 1-1e-2,
                value = 0.5,
                step = 1e-2,
                ticks=True,
                animate=False
            )
        ),
        ui.output_plot("plots", height="500px")
    )
)

# Define the server logic
def server(input, output, session):
    @output
    @render.plot
    def plots():
        X = stats.gamma(a=2, scale=1)
        min_X, max_X = X.ppf((1e-5, 1-1e-5))
        x = np.linspace(min_X-1, max_X+1, 500)

        alpha = input.alpha()
        q_alpha = X.ppf(1-alpha)

        # Create figure and axes
        fig, ax = plt.subplots(1, 2, figsize=(12, 5))
        
        # Histogram Plot
        ax[0].plot(x, X.pdf(x), lw=1, label='')
        ax[0].set_ylabel('PDF')
        ax[0].fill_between(x, 0, X.pdf(x), where=(x > q_alpha), alpha=0.5)
        ax[0].set_title(rf'$P(X > q_\alpha)$={1-X.cdf(q_alpha): .2f}'); 
        ax[0].axvline(q_alpha, color='red', linestyle='--')

        ax[1].plot(x, X.cdf(x), lw=1, label='')
        ax[1].set_ylabel('CDF')
        ax[1].axhline(X.cdf(q_alpha), color='black', linestyle='--', lw=0.5)
        ax[1].axvline(q_alpha, color='red', linestyle='--')
        ax[1].set_title(rf'$q_\alpha$={q_alpha: .3f}')
        plt.tight_layout()
        return fig

# Create the Shiny app
app = App(app_ui, server)
app

Summary

Discrete


  • \[F_{X}(x) = {\mathbb P}(X \le x) = \sum_{y \le x} p_{X}(y)\]

  • \[{\mathbb E}(X) = \sum\limits_{\text{supp}(X)} x \cdot p_{X}(x)\]

  • \[{\mathbb E}(X^2) = \sum\limits_{\text{supp}(X)} x^2 \cdot p_{X}(x)\]

  • \[{\text{Var}}(X) = {\mathbb E}(X^2) - {\mathbb E}(X)^2\]

Continuous


  • \[F_{X}(x) = {\mathbb P}(X \le x) = \int_{-\infty}^x f_{X}(x)dx\]

  • \[{\mathbb E}(X) = \int_{-\infty}^{\infty} x \cdot f_{X}(x) dx\]

  • \[{\mathbb E}(X^2) = \int_{-\infty}^{\infty} x^2 \cdot f_{X}(x) dx\]

  • \[{\text{Var}}(X) = {\mathbb E}(X^2) - {\mathbb E}(X)^2\]

Example

Note

Consider a continuous random variable \(X\) with the following probability density function (PDF): \[ f_X(x) = \begin{cases} Cx^2 & \text{for } 0 < x \le 1 \\ C(2-x) & \text{for } 1 < x \le 2 \\ 0 & \text{otherwise} \end{cases} \]

  1. Find the value of \(C\) so that \(f_{X}\) is a valid PDF
  2. Evaluate \(F_{X}(1.5)\)
  3. Calculate \({\mathbb E}(X)\)
  4. Calculate \({\text{Var}}(X)\)

Determining the value of \(C\)

For \(f_X(x)\) to be a valid probability density function, it must satisfy:

\(\int_{-\infty}^{\infty} f_X(x) \, dx = 1, \quad \text{i.e.,}\)

\[ \newcommand{\phn}{\phantom{\frac 11\!\!\!\!}} \begin{aligned} 1=& \int_0^1 Cx^2 dx + \int_1^2 C(2-x)dx\\ =& C \left[\phn\color{red}{\frac{x^3}{3}}\right]_0^1 + C^2\left[\phn\color{red}{2x - \frac{x^2}{2}}\right]_1^2 \\ =& C \left[\phn\color{red}{\frac{1^3}{3}- \frac{0^3}{3}}\right]_0^1 + C\left[\phn\color{red}{(4 - \frac{2^2}{2})-(2 - \frac{1^2}{2})}\right]_1^2 \\ =& \frac{C}{3} + \frac{C}{2} \\ \therefore C &= \frac{6}{5} = 1.2 \end{aligned}\]

Plot of \(f_{X}(x)\)

Computing \(F_{X}(1.5)\)

\[\begin{aligned} F_{X}(1.5) &= \int_{-\infty}^{1.5}f_{X}(x)dx\\ &= \int_{-\infty}^{0}0dx + \int_0^{1}Cx^2dx + \int_{1}^{1.5}C(2-x)dx\\ &= 0 + C\left(\color{red}{\frac{1}{3}-\frac{0}{3}}\right) + C\left(\color{red}{\left(2 \times 1.5 - \frac{1.5^2}{2}\right)-\left(2 - \frac{1^2}{2}\right)}\right)dx\\ &= C \times \color{red}{\frac{1}{3}} + C \times \color{red}{0.375}\\ &= 1.2 \times 0.708\\ &= 0.85 \end{aligned}\]

Computing \({\mathbb E}(X)\)

\[\begin{aligned} {\mathbb E}(X) &= \int_{-\infty}^{\infty}x \cdot f_{X}(x)dx\\ &= \int_{-\infty}^{0}0dx + \int_0^{1}Cx \cdot x^2dx + \int_{1}^{2}Cx(2-x)dx + \int_{2}^\infty 0dx\\ &= C\left[\color{red}{\frac{x^4}{4}}\right]_0^1 + C\left[\color{red}{\frac{2x^2}{2}-\frac{x^3}{3}}\right]_1^2\\ &= C\left[\color{red}{\frac{1}{4}}\right] + \left[\color{red}{{4-1}-\frac{8-1}{3}}\right]\\ &= 1.2\times \left[\color{red}{\frac{1}{4}}\right] + 1.2 \times \left[\color{red}{\frac{2}{3}}\right]\\ \therefore {\mathbb E}(X) &= 1.1 \end{aligned}\]

Computing \({\text{Var}}(X)\)

\[\begin{aligned} {\mathbb E}(X^2) &= \int_{-\infty}^{\infty}x^2 \cdot f_{X}(x)dx\\ &= \int_0^{1}Cx^2 \cdot x^2dx + \int_{1}^{2}Cx^2(2-x)dx\\ &= C\left[\color{red}{\frac{x^5}{5}}\right]_0^1 + C\left[\color{red}{\frac{2x^3}{3}-\frac{x^4}{4}}\right]_1^2\\ &= C\left[\color{red}{\dots}\right] + C\left[\color{red}{\dots}\right]\\ \end{aligned}\]

Then

\[ {\text{Var}}(X) = {\mathbb E}(X^2)-{\mathbb E}(X)^2. \]

Examples of Continuous Random Variables

Uniform Distribution

Definition

\(X \sim {\text{Unif}}(a, b)\) is a continuous Uniform random variable with the PDF given by \[ f_{X}(x) = \begin{cases} \frac{1}{b-a} & \text{if } a \le x \le b\\ 0 & \text{otherwise } \end{cases} \]

Support

\[ \text{supp}(X) = [a, b] = \left\{x: 0 \le x \le b\right\} \]

CDF

\[\begin{aligned} F_X(x) = {\mathbb P}(X \le x) = \begin{cases} 0 & x \le a\\ \frac{x-a}{b-a} & a < x \le b\\ 1 & b < x \end{cases} \end{aligned}\]

Expected Value and Variance

\[ {\mathbb E}(X) = \frac{b+a}{2} \quad {\text{Var}}(X) = \frac{(b-a)^2}{12} \]

Illustration: \({\text{Unif}}(a, b)\)

#| standalone: true
#| viewerHeight: 600
#| components: viewer
#| layout: vertical

import numpy as np
import scipy
from scipy import stats
import scipy.stats as stats
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from shiny import App, render, ui
from matplotlib.patches import Patch

# Generate a random sample

# Define the UI
app_ui = ui.page_fluid(
    ui.layout_sidebar(
        ui.sidebar(
            ui.input_slider(
                "alpha",
                "alpha",
                min = 1e-2,
                max = 1-1e-2,
                value = 0.5,
                step = 1e-2,
                ticks=True,
                animate=False
            ),
            ui.input_slider(
                "b",
                "b",
                min = 0.25,
                max = 2.5,
                value = 1.5,
                step = 0.25,
                ticks=True,
                animate=False
            ),
        ),
        ui.output_plot("plots", height="500px")
    )
)

# Define the server logic
def server(input, output, session):
    @output
    @render.plot
    def plots():
        b = input.b()
        Z = stats.uniform()
        X = stats.uniform(loc=0, scale=b)
        min_X, max_X = X.ppf((1e-5, 1-1e-5))
        x = np.linspace(min_X-1, max_X+1, 500)

        alpha = input.alpha()
        q_alpha = X.ppf(1-alpha)

        # Create figure and axes
        fig, ax = plt.subplots(1, 2, figsize=(12, 5))
        
        # Histogram Plot
        ax[0].plot(x, X.pdf(x), lw=1, label='')
        ax[0].plot(x, Z.pdf(x), lw=1, label='',color='black', linestyle='--')
        ax[0].set_ylabel('PDF')
        ax[0].fill_between(x, 0, X.pdf(x), where=(x > q_alpha), alpha=0.5)
        ax[0].set_title(rf'$P(X > q_\alpha)$={1-X.cdf(q_alpha): .2f}'); 
        ax[0].axvline(q_alpha, color='red', linestyle='--')
        ax[0].set_xlim(-1, 5)
        ax[0].set_ylim(0, 4)

        ax[1].plot(x, X.cdf(x), lw=1, label='')
        ax[1].plot(x, Z.cdf(x), lw=1, label='', color='black', linestyle='--')
        ax[1].set_ylabel('CDF')
        ax[1].axhline(X.cdf(q_alpha), color='black', linestyle='--', lw=0.5)
        ax[1].axvline(q_alpha, color='red', linestyle='--')
        ax[1].set_title(rf'$q_\alpha$={q_alpha: .3f}')
        ax[1].set_xlim(-1, 5)
        plt.tight_layout()
        return fig

# Create the Shiny app
app = App(app_ui, server)
app

Example

Example

If a manufacturer claims a battery has a life of 50 to 70 hours, and any duration within this range is equally likely, then the battery life is a continuous uniform random variable \(X \sim {\text{Unif}}(50, 70)\).

Exponential Distribution

Definition

\(X \sim {\text{Exp}}(\lambda)\) is a continuous exponential random variable with the PDF given by: \[ f_X(x) = \begin{cases} \lambda e^{-\lambda x} & \text{if } x \ge 0\\ 0 & \text{otherwise} \end{cases} \] Where \(\lambda > 0\) is called the rate parameter.

Support

\[ \text{supp}(X) = [0, \infty) = \{x: x \ge 0\} \]

CDF

\[\begin{aligned} F_X(x) = {\mathbb P}(X \le x) = \begin{cases} 1 - e^{-\lambda x} & x \ge 0\\ 0 & x < 0 \end{cases} \end{aligned}\]

Expected Value and Variance

\[ {\mathbb E}(X) = \frac{1}{\lambda} \quad {\text{Var}}(X) = \frac{1}{\lambda^2} \]

Illustration: \({\text{Exp}}(\lambda)\)

#| standalone: true
#| viewerHeight: 600
#| components: viewer
#| layout: vertical

import numpy as np
import scipy
from scipy import stats
import scipy.stats as stats
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from shiny import App, render, ui
from matplotlib.patches import Patch

# Generate a random sample

# Define the UI
app_ui = ui.page_fluid(
    ui.layout_sidebar(
        ui.sidebar(
            ui.input_slider(
                "alpha",
                "alpha",
                min = 1e-2,
                max = 1-1e-2,
                value = 0.5,
                step = 1e-2,
                ticks=True,
                animate=False
            ),
            ui.input_slider(
                "lam",
                "lambda",
                min = 0.25,
                max = 2.5,
                value = 1.5,
                step = 0.25,
                ticks=True,
                animate=False
            ),
        ),
        ui.output_plot("plots", height="500px")
    )
)

# Define the server logic
def server(input, output, session):
    @output
    @render.plot
    def plots():
        lam = input.lam()
        Z = stats.expon()
        X = stats.expon(scale=1/lam)
        min_X, max_X = X.ppf((1e-5, 1-1e-5))
        x = np.linspace(min_X-1, max_X+1, 500)

        alpha = input.alpha()
        q_alpha = X.ppf(1-alpha)

        # Create figure and axes
        fig, ax = plt.subplots(1, 2, figsize=(12, 5))
        
        # Histogram Plot
        ax[0].plot(x, X.pdf(x), lw=1, label='')
        ax[0].plot(x, Z.pdf(x), lw=1, label='',color='black', linestyle='--')
        ax[0].set_ylabel('PDF')
        ax[0].fill_between(x, 0, X.pdf(x), where=(x > q_alpha), alpha=0.5)
        ax[0].set_title(rf'$P(X > q_\alpha)$={1-X.cdf(q_alpha): .2f}'); 
        ax[0].axvline(q_alpha, color='red', linestyle='--')
        ax[0].set_xlim(-1, 5)
        ax[0].set_ylim(0, 4)

        ax[1].plot(x, X.cdf(x), lw=1, label='')
        ax[1].plot(x, Z.cdf(x), lw=1, label='', color='black', linestyle='--')
        ax[1].set_ylabel('CDF')
        ax[1].axhline(X.cdf(q_alpha), color='black', linestyle='--', lw=0.5)
        ax[1].axvline(q_alpha, color='red', linestyle='--')
        ax[1].set_title(rf'$q_\alpha$={q_alpha: .3f}')
        ax[1].set_xlim(-1, 5)
        plt.tight_layout()
        return fig

# Create the Shiny app
app = App(app_ui, server)
app

Example

Example

Consider a car rental shop where cars are rented at a constant rate of 3 cars per hour. The time between successive car rentals can be modeled as an exponential random variable (X (3)).

Normal Distribution

Definition

A random variable \(X \sim \mathcal{N}(\mu, \sigma^2)\) is said to follow a normal distribution where \(\mu\) is the mean and \(\sigma\) is the standard deviation. Its probability density function (PDF) is given by: \[ f_X(x) = \frac{1}{\sqrt{2\pi\sigma^2}} \exp\left(-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^2\right) \]

Support

\[ \text{supp}(X) = {\mathbb R}= (-\infty, \infty) \]

Expected Value and Variance

\[ {\mathbb E}(X) = \mu \quad {\text{Var}}(X) = \sigma^2 \]

CDF

For the standard normal distribution \(Z \sim \mathbb{N}(0,1)\) \[ F_Z(x) = {\mathbb P}(Z \le x) = \Phi(x) \]

Illustration: \(N(\mu, \sigma^2)\)

#| standalone: true
#| viewerHeight: 600
#| components: viewer
#| layout: vertical

import numpy as np
import scipy
from scipy import stats
import scipy.stats as stats
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from shiny import App, render, ui
from matplotlib.patches import Patch

# Generate a random sample

# Define the UI
app_ui = ui.page_fluid(
    ui.layout_sidebar(
        ui.sidebar(
            ui.input_slider(
                "alpha",
                "alpha",
                min = 1e-2,
                max = 1-1e-2,
                value = 0.5,
                step = 1e-2,
                ticks=True,
                animate=False
            ),
            ui.input_slider(
                "mu",
                "mu",
                min = -1.0,
                max = 1.0,
                value = 0.0,
                step = 0.25,
                ticks=True,
                animate=False
            ),
            ui.input_slider(
                "sigma",
                "sigma",
                min = 0.25,
                max = 2.0,
                value = 1.5,
                step = 0.25,
                ticks=True,
                animate=False
            ),
        ),
        ui.output_plot("plots", height="500px")
    )
)

# Define the server logic
def server(input, output, session):
    @output
    @render.plot
    def plots():
        mu = input.mu()
        sigma = input.sigma()
        Z = stats.norm()
        X = stats.norm(loc=mu, scale=sigma)
        min_X, max_X = X.ppf((1e-5, 1-1e-5))
        x = np.linspace(min_X-1, max_X+1, 500)

        alpha = input.alpha()
        q_alpha = X.ppf(1-alpha)

        # Create figure and axes
        fig, ax = plt.subplots(1, 2, figsize=(12, 5))
        
        # Histogram Plot
        ax[0].plot(x, X.pdf(x), lw=1, label='')
        ax[0].plot(x, Z.pdf(x), lw=1, label='',color='black', linestyle='--')
        ax[0].set_ylabel('PDF')
        ax[0].fill_between(x, 0, X.pdf(x), where=(x > q_alpha), alpha=0.5)
        ax[0].set_title(rf'$P(X > q_\alpha)$={1-X.cdf(q_alpha): .2f}'); 
        ax[0].axvline(q_alpha, color='red', linestyle='--')
        ax[0].set_xlim(-5, 5)
        ax[0].set_ylim(0, 2)

        ax[1].plot(x, X.cdf(x), lw=1, label='')
        ax[1].plot(x, Z.cdf(x), lw=1, label='', color='black', linestyle='--')
        ax[1].set_ylabel('CDF')
        ax[1].axhline(X.cdf(q_alpha), color='black', linestyle='--', lw=0.5)
        ax[1].axvline(q_alpha, color='red', linestyle='--')
        ax[1].set_title(rf'$q_\alpha$={q_alpha: .3f}')
        ax[1].set_xlim(-5, 5)
        plt.tight_layout()
        return fig

# Create the Shiny app
app = App(app_ui, server)
app

Example

Example

The heights of adult men in a certain population are normally distributed with a mean of 175 cm and a standard deviation of 7 cm. If a man from this population is selected at random, his height can be modeled as a random variable \(X \sim \mathbb{N}(175, 49)\).

Chi-Squared Distribution

Definition

If \(Z_1, Z_2, \ldots, Z_k\) are independent standard normal random variables, then the sum of their squares: \[ X = Z_1^2 + Z_2^2 + \ldots + Z_k^2 \] is distributed as a chi-squared distribution with \(k\) degrees of freedom, denoted as \[X \sim {\chi^2}(k). \] The probability density function (PDF) is given by: \[ f_X(x) = \frac{1}{2^{k/2} \Gamma(k/2)} x^{k/2 - 1} e^{-x/2} \]

Support

\[ \text{supp}(X) = {\mathbb R}_+ = [0, \infty) = \{x: x \ge 0\} \]

Expected Value and Variance

\[ {\mathbb E}(X) = k \quad {\text{Var}}(X) = 2k \]

Illustration: \({\chi^2}(k)\)

#| standalone: true
#| viewerHeight: 600
#| components: viewer
#| layout: vertical

import numpy as np
import scipy
from scipy import stats
import scipy.stats as stats
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from shiny import App, render, ui
from matplotlib.patches import Patch

# Generate a random sample

# Define the UI
app_ui = ui.page_fluid(
    ui.layout_sidebar(
        ui.sidebar(
            ui.input_slider(
                "alpha",
                "alpha",
                min = 1e-2,
                max = 1-1e-2,
                value = 0.5,
                step = 1e-2,
                ticks=True,
                animate=False
            ),
            ui.input_slider(
                "k",
                "k",
                min = 1,
                max = 7,
                value = 3,
                step = 1,
                ticks=True,
                animate=False
            ),
        ),
        ui.output_plot("plots", height="500px")
    )
)

# Define the server logic
def server(input, output, session):
    @output
    @render.plot
    def plots():
        k = input.k()
        Z = stats.chi2(df=1)
        X = stats.chi2(df=k)
        min_X, max_X = X.ppf((1e-5, 1-1e-5))
        x = np.linspace(min_X-1, max_X+1, 500)

        alpha = input.alpha()
        q_alpha = X.ppf(1-alpha)

        # Create figure and axes
        fig, ax = plt.subplots(1, 2, figsize=(12, 5))
        
        # Histogram Plot
        ax[0].plot(x, X.pdf(x), lw=1, label='')
        ax[0].plot(x, Z.pdf(x), lw=1, label='',color='black', linestyle='--')
        ax[0].set_ylabel('PDF')
        ax[0].fill_between(x, 0, X.pdf(x), where=(x > q_alpha), alpha=0.5)
        ax[0].set_title(rf'$P(X > q_\alpha)$={1-X.cdf(q_alpha): .2f}'); 
        ax[0].axvline(q_alpha, color='red', linestyle='--')
        ax[0].set_xlim(-1, 15)
        ax[0].set_ylim(0, 1)

        ax[1].plot(x, X.cdf(x), lw=1, label='')
        ax[1].plot(x, Z.cdf(x), lw=1, label='', color='black', linestyle='--')
        ax[1].set_ylabel('CDF')
        ax[1].axhline(X.cdf(q_alpha), color='black', linestyle='--', lw=0.5)
        ax[1].axvline(q_alpha, color='red', linestyle='--')
        ax[1].set_title(rf'$q_\alpha$={q_alpha: .3f}')
        ax[1].set_xlim(-1, 15)
        plt.tight_layout()
        return fig

# Create the Shiny app
app = App(app_ui, server)
app

Example

If \(X_1, X_2, \dots, X_n\) are a sample taken from a distribution which follows a normal distribution \(\mathbb{N}(\mu, \sigma^2)\). Then the sample variance \[{{\text{Var}}(X_1, X_2, \dots, X_n)} = \frac{1}{(n-1)}\sum_{i=1}^n(X_i - \overline X)^2\] will follow the rescaled \(\frac{\sigma^2}{(n-1)} \cdot {\chi^2}(n-1)\) distribution.