\lecture{13}{2023-05}{} %The difficult part is to show \yaref{levycontinuity}. %This is the last lecture, where we will deal with independent random variables. We have seen, that if $X_1, X_2,\ldots$ are i.i.d.~with $ \mu = \bE[X_1]$, $\sigma^2 = \Var(X_1)$, then $\frac{\sum_{i=1}^{n} (X_i - \mu)}{\sigma \sqrt{n} } \xrightarrow{(d)} \cN(0,1)$. \begin{question} What happens if $X_1, X_2,\ldots$ are independent, but not identically distributed? Do we still have a CLT? \end{question} \begin{theorem}[Lindeberg CLT] \yalabel{Lindeberg's CLT}{Lindeberg CLT}{lindebergclt} Assume $X_1, X_2, \ldots,$ are independent (but not necessarily identically distributed) with $\mu_i = \bE[X_i] < \infty$ and $\sigma_i^2 = \Var(X_i) < \infty$. Let $S_n = \sqrt{\sum_{i=1}^{n} \sigma_i^2}$ and assume that \[\lim_{n \to \infty} \frac{1}{S_n^2} \sum_{i=1}^{n} \bE\left[ (X_i - \mu_i)^2 \One_{|X_i - \mu_i| > \epsilon S_n} \right] = 0\] for all $\epsilon > 0$ (\vocab{Lindeberg condition}\footnote{``The truncated variance is negligible compared to the variance.''}). Then the CLT holds, i.e.~ \[ \frac{\sum_{i=1}^n (X_i - \mu_i)}{S_n} \xrightarrow{(d)} \cN(0,1). \] \end{theorem} \begin{theorem}[Lyapunov condition] \yalabel{Lyapunov's CLT}{Lyapunov CLT}{lyapunovclt} Let $X_1, X_2,\ldots$ be independent, $\mu_i = \bE[X_i] < \infty$, $\sigma_i^2 = \Var(X_i) < \infty$ and $S_n \coloneqq \sqrt{\sum_{i=1}^n \sigma_i^2}$. Then, assume that, for some $\delta > 0$, \[ \lim_{n \to \infty} \frac{1}{S_n^{2+\delta}} \sum_{i=1}^{n} \bE[(X_i - \mu_i)^{2 + \delta}] = 0 \] (\vocab{Lyapunov condition}). Then the CLT holds. \end{theorem} \begin{remark} The Lyapunov condition implies the Lindeberg condition. (Exercise). \end{remark} We will not prove \yaref{lindebergclt} or \yaref{lyapunovclt} in this lecture. However, they are quite important. We will now sketch the proof of \yaref{levycontinuity}, details can be found in the notes.\notes \begin{definition} Let $(X_n)_n$ be a sequence of random variables. The distribution of $(X_n)_n$ is called \vocab[Distribution!tight]{tight} (dt. ``straff''), if \[ \lim_{a \to \infty} \sup_{n \in \N} \bP[|X_n| > a] = 0. \] \end{definition} \begin{example}+[Exercise 8.1] \todo{Copy} \end{example} A generalized version of \yaref{levycontinuity} is the following: \begin{theorem}[A generalized version of \yaref{levycontinuity}] \label{genlevycontinuity} Suppose we have random variables $(X_n)_n$ such that $\bE[e^{\i t X_n}] \xrightarrow{n \to \infty} \phi(t)$ for all $t \in \R$ for some function $\phi$ on $\R$. Then the following are equivalent: \begin{enumerate}[(a)] \item The distribution of $X_n$ is tight. \item $X_n \xrightarrow{(d)} X$ for some real-valued random variable $X$. \item $\phi$ is the characteristic function of $X$. \item $\phi$ is continuous on all of $\R$. \item $\phi$ is continuous at $0$. \end{enumerate} \end{theorem} \todo{Proof of \yaref{genlevycontinuity} (Exercise 8.2)} \begin{example} Let $Z \sim \cN(0,1)$ and $X_n \coloneqq n Z$. We have $\phi_{X_n}(t) = \bE[[e^{\i t X_n}] = e^{-\frac{1}{2} t^2 n^2} \xrightarrow{n \to \infty} \One_{\{t = 0\} }$. $\One_{\{t = 0\}}$ is not continuous at $0$. By \yaref{genlevycontinuity}, $X_n$ can not converge to a real-valued random variable. Exercise: $X_n \xrightarrow{(d)} \overline{X}$, where $\bP[\overline{X} = \infty] = \frac{1}{2} = \bP[\overline{X} = -\infty]$. Similar examples are $\mu_n \coloneqq \delta_n$ and $\mu_n \coloneqq \frac{1}{2} \delta_n + \frac{1}{2} \delta_{-n}$. \end{example} \begin{example} Suppose that $X_1, X_2,\ldots$ are i.d.d.~with $\bE[X_1] = 0$. Let $\sigma^2 \coloneqq \Var(X_i)$. Then the distribution of $\frac{S_n}{\sigma \sqrt{n}}$ is tight: \begin{IEEEeqnarray*}{rCl} \bE\left[ \left( \frac{S_n}{\sqrt{n} }^2 \right)^2 \right] &=& \frac{1}{n} \bE[ (X_1+ \ldots + X_n)^2]\\ &=& \sigma^2 \end{IEEEeqnarray*} For $a > 0$, by \yaref{thm:chebyshev}, we have \[ \bP\left[ \left| \frac{S_n}{\sqrt{n}} \right| > a \right] \leq \frac{\sigma^2}{a^2} \xrightarrow{a \to \infty} 0. \] verifying \yaref{genlevycontinuity}. \end{example} \begin{example} Suppose $C$ is a random variable which is \vocab[Cauchy distribution]{Cauchy distributed}, i.e.~$C$ has probability distribution $f_C(x) = \frac{1}{\pi} \frac{1}{1 + x^2}$. \begin{figure}[H] \centering \begin{tikzpicture} \begin{axis}[samples=100, smooth] \addplot[] { (1/3.14159265358979323846) * (1 / ( 1 + x * x))}; \end{axis} \end{tikzpicture} \caption{Probability density function of $C$} \end{figure} We know that $\bE[|C|] = \infty$. We have $\phi_C(t) = \bE[e^{\i t C}] = e^{-|t|}$. Suppose $C_1, C_2, \ldots, C_n$ are i.i.d.~Cauchy distributed and let $S_n \coloneqq C_1 + \ldots + C_n$. Exercise: $\phi_{\frac{S_n}{n}}(t) = e^{-|t|} = \phi_{C_1}(t)$, thus $\frac{S_n}{n} \sim C$. \end{example} We will prove \yaref{levycontinuity} assuming \yaref{lec10_thm1}. \yaref{lec10_thm1} will be shown in the notes.\notes We will need the following: \begin{lemma} \label{lec13_lem1} Given a sequence $(F_n)_n$ of probability distribution functions, there is a subsequence $(F_{n_k})_k$ of $F_n$ and a right continuous, non-decreasing function $F$, such that $F_{n_k} \to F$ at all continuity points of $F$. (We do not yet claim, that $F$ is a probability distribution function, as we ignore $\lim_{x \to \infty} F(x)$ and $\lim_{x \to -\infty} F(x)$ for now). \end{lemma} \begin{lemma} \label{s7e1} Let $\mu \in M_1(\R)$, $A > 0$ and $\phi$ the characteristic function of $\mu$. Then $\mu\left( (-A,A) \right) \ge \frac{A}{2} \left| \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) d t \right| - 1$. \end{lemma} \begin{refproof}{s7e1} We have \begin{IEEEeqnarray*}{rCl} \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) \dif t &=& \int_{-\frac{2}{A}}^{\frac{2}{A}} \int_{\R} e^{\i t x} \mu(\dif x) \dif t\\ &=& \int_{\R} \int_{-\frac{2}{A}}^{\frac{2}{A}} e^{\i t x} \dif t \mu(\dif x)\\ &=& \int_{\R} \int_{-\frac{2}{A}}^{\frac{2}{A}} \cos(t x) \dif t \mu(\dif x)\\ &=& \int_{\R} \frac{2 \sin\left( \frac{2x}{A}\right) }{x} \mu(\dif x).\\ \end{IEEEeqnarray*} Hence \begin{IEEEeqnarray*}{rCl} \frac{A}{2}\left|\int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) \dif t\right| &=& \left| A \int_{\R} \frac{\sin\left( \frac{2x}{A} \right) }{x} \mu(\dif t)\right|\\ &=& 2\left| \int_{\R} \sinc\left( \frac{2x}{A} \right) \mu(\dif t)\right|\\ &\le& 2 \left[ \int_{|x| < A} \underbrace{\left|\sinc\left( \frac{2x}{A} \right) \right|}_{\le 1} \mu(\dif x) + \int_{|x| \ge A} \left|\sinc\left( \frac{2x}{A} \right)\right| \mu(\dif x) \right]\\ &\le& 2 \left[ \mu\left( (-A,A) \right) + \frac{A}{2} \int_{|x| \ge A} \frac{\sin(2x / A)|}{|x|} \mu(\dif x) \right]\\ &\le& 2 \left[ \mu\left( (-A,A) \right) + \frac{A}{2} \int_{|x| \ge A} \frac{1}{A} \mu(\dif x) \right]\\ &\le& 2 \mu((-A,A)) + \mu((-A,A)^c)\\ &=& 1 + \mu((-A,A)). \end{IEEEeqnarray*} \end{refproof} \begin{refproof}{levycontinuity} ``$\implies$ '' If $\mu_n \implies \mu$, then by definition $\int f \dif \mu_n \to \int f \dif \mu$ for all $f \in C_b$. Since $x \to e^{\i t x}$ is continuous and bounded, it follows that $\phi_n(t) \to \phi(t)$ for all $t \in \R$. ``$ \impliedby$'' % Step 1: \begin{claim} \label{levyproofc1} Given $\epsilon > 0$ there exists $A > 0$ such that $\liminf_n \mu_n\left( (-A,A) \right) \ge 1 - 2 \epsilon$. \end{claim} \begin{refproof}{levyproofc1} If $f$ is continuous, then \[ \frac{1}{\eta} \int_{x - \eta}^{x + \eta} f(t) d t \xrightarrow{\eta \downarrow 0} f(x). \] Applying this to $\phi$ at $t = 0$, one obtains: \begin{equation} \left| \frac{A}{4} \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) dt - 1 \right| < \frac{\epsilon}{2} \label{levyproofc1eqn1} \end{equation} \begin{claim} For $n$ large enough, we have \begin{equation} \left| \frac{A}{4} \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) d t - 1\right| < \epsilon. \label{levyproofc1eqn2} \end{equation} \end{claim} \begin{subproof} Apply dominated convergence. \end{subproof} So to prove $\mu_n\left( (-A,A) \right) \ge 1 - 2 \epsilon$, apply \yaref{s7e1}. It suffices to show that \[ \frac{A}{2} \left| \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) dt\right| - 1 \ge 1 - 2\epsilon \] or \[ 1 - \frac{A}{4} \left|\int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) dt \right| \le \epsilon, \] which follows from \yaref{levyproofc1eqn2}. \end{refproof} % Step 2 By \yaref{lec13_lem1} there exists a right continuous, non-decreasing $F $ and a subsequence $(F_{n_k})_k$ of $(F_n)_n$ where $F_n$ is the probability distribution function of $\mu_n$, such that $F_{n_k}(x) \to F(x)$ for all $x$ where $F$ is continuous. \begin{claim} \[ \lim_{n \to -\infty} F(x) = 0 \] and \[ \lim_{n \to \infty} F(x) = 1, \] i.e.~$F$ is a probability distribution function.\footnote{This does not hold in general!} \end{claim} \begin{subproof} We have \[ \mu_{n_k}\left( (- \infty, x] \right) = F_{n_k}(x) \to F(x). \] Again, given $\epsilon > 0$, there exists $A > 0$, such that $\mu_{n_k}\left( (-A,A) \right) > 1 - 2 \epsilon$ (\yaref{levyproofc1}). Hence $F(x) \ge 1 - 2 \epsilon$ for $x > A $ and $F(x) \le 2\epsilon$ for $x < -A$. This proves the claim. \end{subproof} Since $F$ is a probability distribution function, there exists a probability measure $\nu$ on $\R$ such that $F$ is the distribution function of $\nu$. Since $F_{n_k}(x) \to F_n(x)$ at all continuity points $x$ of $F$, by \yaref{lec10_thm1} we obtain that $\mu_{n_k} \overset{k \to \infty}{\implies} \nu$. Hence $\phi_{\mu_{n_k}}(t) \to \phi_\nu(t)$, by the other direction of that theorem. But by assumption, $\phi_{\mu_{n_k}}(\cdot ) \to \phi_n(\cdot )$ so $\phi_{\mu}(\cdot) = \phi_{\nu}(\cdot )$. By the \yaref{charfuncuniqueness}, we get $\mu = \nu$. We have shown, that $\mu_{n_k} \implies \mu$ along a subsequence. We still need to show that $\mu_n \implies \mu$. \begin{fact} Suppose $a_n$ is a bounded sequence in $\R$, such that any convergent subsequence converges to $a \in \R$. Then $a_n \to a$. \end{fact} % \begin{subproof} % \notes % \end{subproof} Assume that $\mu_n$ does not converge to $\mu$. By \yaref{lec10_thm1}, pick a continuity point $x_0$ of $F$, such that $F_n(x_0) \not\to F(x_0)$. Pick $\delta > 0$ and a subsequence $F_{n_1}(x_0), F_{n_2}(x_0), \ldots$ which are all outside $(F(x_0) - \delta, F(x_0) + \delta)$. Then $\phi_{n_1}, \phi_{n_2}, \ldots \to \phi$. Now, there exists a further subsequence $G_1, G_2, \ldots$ of $F_{n_i}$, which converges. $G_1, G_2, \ldots$ is a subsequence of $F_1, F_2,\ldots$. However $G_1, G_2, \ldots$ is not converging to $F$, as this would fail at $x_0$. This is a contradiction. \end{refproof} \begin{refproof}{genlevycontinuity} % TODO TODO TODO \end{refproof} % IID is over now \subsection{Summary} What did we learn: \begin{itemize} \item How to construct product measures \item WLLN and SLLN \item Kolmogorov's three series theorem \item Fourier transform, weak convergence and CLT \end{itemize}