diff --git a/inputs/lecture_10.tex b/inputs/lecture_10.tex index 7abe793..2a6f4ea 100644 --- a/inputs/lecture_10.tex +++ b/inputs/lecture_10.tex @@ -175,8 +175,8 @@ However, Fourier analysis is not only useful for continuous probability density \end{IEEEeqnarray*} \end{refproof} \begin{theorem}[Bochner's theorem]\label{bochnersthm} - The converse to \autoref{thm:lec_10thm5} holds, i.e.~ - any $\phi: \R \to \C$ satisfying (a) and (b) of \autoref{thm:lec_10thm5} + The converse to \autoref{thm:lec_10thm5} holds, i.e.~ any + $\phi: \R \to \C$ satisfying (a) and (b) of \autoref{thm:lec_10thm5} must be the Fourier transform of a probability measure $\bP$ on $(\R, \cB(\R))$. \end{theorem} diff --git a/inputs/lecture_11.tex b/inputs/lecture_11.tex new file mode 100644 index 0000000..67c1752 --- /dev/null +++ b/inputs/lecture_11.tex @@ -0,0 +1,130 @@ +\subsection{The central limit theorem} + +For $X_1, X_2,\ldots$ i.i.d.~we were looking +at $S_n \coloneqq \sum_{i=1}^n X_i$. +Then the LLN basically states, that $S_n$ can be approximated by $n \bE[X_1]$. +\begin{question} + What is the error of this approximation? +\end{question} + +We set $\mu\coloneqq \bE[X_1]$ and $\sigma^2 \coloneqq \Var(X_1) \in (0,\infty)$. +We know that $\bE[S_n] = n \mu$ and $\Var(S_n) = n\sigma^2$. + +The central limit theorem basically states, that the distribution of $S_n$ +can be approximated by a normal distribution with mean $n \mu$ and +variance $n \sigma^2$, +i.e.~$S_n \approx n \mu + \sigma \sqrt{n} N$ for $N \sim \cN(0,1)$, +where $\approx$ is to be made precise. + +For intuition, watch \url{https://3blue1brown.com/lessons/clt}. + +\begin{example} + We throw a fair die $n = 100$ times and denote the sum of the faces + by $S_n \coloneqq X_1 + \ldots + X_n$, where $X_1,\ldots, X_n$ + are i.i.d.~and uniformly distributed on $\{1,\ldots,6\}$. + Then $\bE[S_n] = 350$ and $\sqrt{\Var(S_n)} = \sigma \approx 17.07$. + \todo{Missing pictures} +\end{example} + +\begin{question} + Why do statisticians care about $\sigma$ instead of $\sigma^2$? +\end{question} +By definition, $\Var(X) = \bE[(X- \bE(X))^2]$, hence $\sqrt{\Var(X)}$ +can be interpreted as a distance. +One could also define $\Var(X)$ to be $\bE[|X - \bE(X)|]$ but this is not +well behaved. + + +\begin{example} + Let $X_1,\ldots,X_n$ be i.i.d.~and $X_1\sim \Exp(1)$. + We knot that for $n \in \N$, $\bE[S_n] = n$ + and $\sqrt{\Var(S_n)} = \sqrt{n}$. + For $n = 100, 300, 500$, we get the following picture + \todo{Missing picture} +\end{example} + +In order to make things nicer, we do the following: +\begin{enumerate}[1.] + \item center: $S_n - \bE[S_n]$, + \item normalize: $\frac{S_n - \bE[S_n]}{\sqrt{\Var(S_n)} }$. +\end{enumerate} +Then $\bE[\frac{S_n - \bE[S_n]}{\sqrt{\Var(S_n)}}] = 0$ +and $\Var(\frac{S_n - \bE[S_n]}{\sqrt{\Var(S_n)}}) = 1$. + +\begin{theorem}[Central limit theorem, 1920s, Lindeberg and Levy]\label{clt} + Let $X_1,X_2,\ldots$ be i.i.d.~random variables + with $\bE[X_1] = \mu$ and $\Var(X_1) = \sigma^2 \in (0, \infty)$. + + Let $S_n \coloneqq \sum_{i=1}^n X_i$. + Then + \[ + \frac{S_n - n \nu}{\sigma \sqrt{n} } \xrightarrow{dist.} \cN(0,1), + \] + i.e.~$\forall x \in \R:$ + \[ + \lim_{n \to \infty} \bP\left[\frac{S_n - n \mu}{\sigma \sqrt{n} } \le x\right] = \Phi(x) = \int_{-\infty}^x \frac{1}{\sqrt{2 \pi} } e^{\frac{-t^2}{2}}dt. + \] +\end{theorem} + +There exists a special case of this theorem, which was proved earlier: +\begin{theorem}[de-Moivre (1730, $p = 0.5$), Laplace (1812, general $p$ )] + \label{preclt} + Let $S_n = \Bin(n,p)$, where $p \in (0,1)$ is constant. + Then, for all $x \in \R$ : + \[ + \lim_{n \to \infty} \bP\left[ \frac{ S_n - np}{\sqrt{n p(1-p)}} \le x\right] = \Phi(x). + \] +\end{theorem} +\begin{proof} + Let $X_1, X_2,\ldots$ i.i.d.~with $X_1 \sim \Ber(p)$. + Then $\bE[X_1] = p$ and $\Var(X_1) = p(1-p )$. + Furthermore $\sum_{i=1}^n X_i \sim \Bin(n,p)$, + and the special case follows from \autoref{clt}. +\end{proof} +\autoref{preclt} is a useful tool for approximating the Binomial distribution with the normal distribution. +If $S_n \sim \Bin(n,p)$ and $[a,b] \subseteq \R$, we have +\[\bP[a \le S_n \le b] = \bP\left[\frac{a - np}{\sqrt{np(1-p)}} \le \frac{S_n -np}{\sqrt{n p (1-p)}} \le \frac{b - np}{\sqrt{n p (1-p)} }\right] \approx \Phi(b') - \Phi(a').\] + +\begin{example} + We consider a $n=40$-times Bernoulli trial with success probability $p = \frac{1}{2}$. + Then $0.9597 = \bP[S \le 25] \approx \Phi(\frac{5}{\sqrt{10}} \approx 0.9431$. + + However, $S$ takes only integer values, which means $\bP[S \le 25] = \bP[S 26]$. + With this in mind, a better approximation is + \[ + \bP[S \le 25] = \bP[S \le 25.5] \approx \Phi\left( \frac{5.5}{\sqrt{10} } \right) \approx 0.9541. + \] +\end{example} + +\begin{example} + Consider a particle that start at $0$ and moves on the lattice $\Z$. + In every step, takes a step $+ 1$ with probability $\frac{1}{2}$ + or $-1$ with probability $\frac{1}{2}$. + + More formally: Let $X_1,X_2,\ldots$ be i.i.d.~with $\bP[X_1=1] = \bP[X_1=-1] = \frac{1}{2}$ and consider $S_n \coloneqq \sum_{i=1}^n X_i$. + + Then \autoref{clt} states, that $S_n \approx \cN(0,n)$. +\end{example} + +\begin{example} + Consider an election with two candidates $A$ and $B$. + The relative number of votes for $A$ is $p \in (0,1)$ (constatn, but unknown) + How many ballots do we need to count to make sure that the probability of erring more than $1\%$ is not bigger than $5\%$? + + Each ballot is a vote for $A$ with probability $p$. + We have $S_n \sim \Bin(n,p)$ and we want to find $n$ such that + $\bP[|S_n - np| \le 0.01 n] \le 0.05$. + We have that + \begin{IEEEeqnarray*}{rCl} + &&\bP[|S_n - nĂ¼| \le 0.01n] \\ + &=& \bP[ -0.01 n \le S_n - np \le 0.01n]\\ + &=& \bP[-\frac{0.01 n}{\sqrt{n p (1-p)} } \le \frac{S_n - np}{\sqrt{n p (1-p)} } \le \frac{0.01 n}{\sqrt{n p (1-p)}}\\ + &\approx& \Phi(0.01 \sqrt{\frac{n}{p(1-p)}}) - \Phi(-0.01 \sqrt{\frac{n}{p(1-p)}})\\ + &=& 2\Phi(0.01 \sqrt{\frac{n}{p(1-p)}}) - 1\\ + \end{IEEEeqnarray*} + Hence, we want $\Phi(0.01 \sqrt{\frac{n}{p(1-p}}) \approx \frac{1.95}{2}$, + i.e.~$n = (1.96)^2 100^2 p\cdot (1-p)$ + We have $p\cdot (1-p) \le \frac{1}{4}$, + thus $n \approx (1.96)^2 \cdot 100^2 \cdot \frac{1}{4} = 9600$ suffices. +\end{example} + diff --git a/probability_theory.tex b/probability_theory.tex index 318c3b8..3a5eefc 100644 --- a/probability_theory.tex +++ b/probability_theory.tex @@ -34,6 +34,7 @@ \input{inputs/lecture_8.tex} \input{inputs/lecture_9.tex} \input{inputs/lecture_10.tex} +\input{inputs/lecture_11.tex} \cleardoublepage