s23-probability-theory/inputs/lecture_13.tex

\lecture{13}{2023-05}{}
%The difficult part is to show \yaref{levycontinuity}.
%This is the last lecture, where we will deal with independent random variables.

We have seen, that
if $X_1, X_2,\ldots$ are i.i.d.~with $ \mu = \bE[X_1]$,
    $\sigma^2 = \Var(X_1)$,
    then $\frac{\sum_{i=1}^{n} (X_i - \mu)}{\sigma \sqrt{n} } \xrightarrow{(d)} \cN(0,1)$.

\begin{question}
    What happens if $X_1, X_2,\ldots$ are independent, but not identically distributed? Do we still have a CLT?
\end{question}

\begin{theorem}[Lindeberg CLT]
    \yalabel{Lindeberg's CLT}{Lindeberg CLT}{lindebergclt}
    Assume $X_1, X_2, \ldots,$ are independent (but not necessarily identically distributed) with $\mu_i = \bE[X_i] < \infty$ and $\sigma_i^2 = \Var(X_i) < \infty$.
    Let $S_n = \sqrt{\sum_{i=1}^{n} \sigma_i^2}$
    and assume that
    \[\lim_{n \to \infty} \frac{1}{S_n^2} \sum_{i=1}^{n} \bE\left[
        (X_i - \mu_i)^2 \One_{|X_i - \mu_i| > \epsilon S_n}
      \right] = 0\]
    for all $\epsilon > 0$
    (\vocab{Lindeberg condition}\footnote{``The truncated variance is negligible compared to the variance.''}).

    Then the CLT holds, i.e.~
    \[
    \frac{\sum_{i=1}^n (X_i - \mu_i)}{S_n} \xrightarrow{(d)}  \cN(0,1).
    \]
\end{theorem}

\begin{theorem}[Lyapunov condition]
    \yalabel{Lyapunov's CLT}{Lyapunov CLT}{lyapunovclt}
    Let $X_1, X_2,\ldots$ be independent, $\mu_i = \bE[X_i] < \infty$,
    $\sigma_i^2 = \Var(X_i) < \infty$
    and $S_n \coloneqq  \sqrt{\sum_{i=1}^n \sigma_i^2}$.
    Then, assume that, for some $\delta > 0$,
    \[
    \lim_{n \to \infty} \frac{1}{S_n^{2+\delta}}
    \sum_{i=1}^{n} \bE[(X_i - \mu_i)^{2 + \delta}] = 0
    \]
    (\vocab{Lyapunov condition}).
    Then the CLT holds.
\end{theorem}
\begin{remark}
    The Lyapunov condition implies the Lindeberg condition.
    (Exercise).
\end{remark}
We will not prove \yaref{lindebergclt} or \yaref{lyapunovclt}
in this lecture. However, they are quite important.

We will now sketch the proof of \yaref{levycontinuity},
details can be found in the notes.\notes
\begin{definition}
    Let $(X_n)_n$ be a sequence of random variables.
    The distribution of $(X_n)_n$ is called
    \vocab[Distribution!tight]{tight}  (dt. ``straff''),
    if
    \[
        \lim_{a \to \infty} \sup_{n \in \N} \bP[|X_n| > a] = 0.
    \]
\end{definition}
\begin{example}+[Exercise 8.1]
    \todo{Copy}
\end{example}
A generalized version of \yaref{levycontinuity} is the following:
\begin{theorem}[A generalized version of \yaref{levycontinuity}]
    \label{genlevycontinuity}
    Suppose we have random variables $(X_n)_n$ such that
    $\bE[e^{\i t X_n}] \xrightarrow{n \to \infty}  \phi(t)$ for all $t \in \R$
    for some function $\phi$ on $\R$.
    Then the following are equivalent:
    \begin{enumerate}[(a)]
        \item The distribution of $X_n$ is tight.
        \item $X_n \xrightarrow{(d)} X$ for some real-valued random variable $X$.
        \item  $\phi$ is the characteristic function of $X$.
        \item $\phi$ is continuous on all of $\R$.
        \item $\phi$ is continuous at $0$.
    \end{enumerate}
\end{theorem}
\todo{Proof of \yaref{genlevycontinuity} (Exercise 8.2)}
\begin{example}
    Let $Z \sim \cN(0,1)$ and $X_n \coloneqq  n Z$.
    We have $\phi_{X_n}(t) = \bE[[e^{\i t X_n}] = e^{-\frac{1}{2} t^2 n^2} \xrightarrow{n \to \infty} \One_{\{t = 0\} }$.
    $\One_{\{t = 0\}}$ is not continuous at $0$.
    By \yaref{genlevycontinuity}, $X_n$ can not converge to a real-valued
    random variable.

    Exercise: $X_n \xrightarrow{(d)} \overline{X}$,
    where $\bP[\overline{X} = \infty] = \frac{1}{2} = \bP[\overline{X} = -\infty]$.

    Similar examples are $\mu_n \coloneqq  \delta_n$ and
    $\mu_n \coloneqq  \frac{1}{2} \delta_n + \frac{1}{2} \delta_{-n}$.
\end{example}

\begin{example}
    Suppose that $X_1, X_2,\ldots$ are i.d.d.~with $\bE[X_1] = 0$.
    Let $\sigma^2 \coloneqq  \Var(X_i)$.
    Then the distribution of $\frac{S_n}{\sigma \sqrt{n}}$ is tight:

    \begin{IEEEeqnarray*}{rCl}
    \bE\left[ \left( \frac{S_n}{\sqrt{n} }^2 \right)^2 \right] &=&
        \frac{1}{n} \bE[ (X_1+ \ldots + X_n)^2]\\
                                                          &=& \sigma^2
    \end{IEEEeqnarray*}
    For $a > 0$, by \yaref{thm:chebyshev},
    we have
    \[
    \bP\left[ \left| \frac{S_n}{\sqrt{n}} \right| > a \right] \leq \frac{\sigma^2}{a^2} \xrightarrow{a \to \infty} 0.
    \]
    verifying \yaref{genlevycontinuity}.
\end{example}

\begin{example}
    Suppose $C$ is a random variable which is \vocab[Cauchy distribution]{Cauchy distributed}, i.e.~$C$
    has probability distribution $f_C(x) = \frac{1}{\pi} \frac{1}{1 + x^2}$.

    \begin{figure}[H]
        \centering
        \begin{tikzpicture}
            \begin{axis}[samples=100, smooth]
                \addplot[] { (1/3.14159265358979323846) * (1 / ( 1 + x * x))};
            \end{axis}
        \end{tikzpicture}
        \caption{Probability density function of $C$}
    \end{figure}

    We know that $\bE[|C|] = \infty$.

    We have $\phi_C(t) = \bE[e^{\i t C}] = e^{-|t|}$.
    Suppose $C_1, C_2, \ldots, C_n$ are i.i.d.~Cauchy distributed
    and let $S_n \coloneqq  C_1 + \ldots + C_n$.

    Exercise: $\phi_{\frac{S_n}{n}}(t) = e^{-|t|} = \phi_{C_1}(t)$, thus $\frac{S_n}{n} \sim  C$.
\end{example}

We will prove \yaref{levycontinuity} assuming
\yaref{lec10_thm1}.
\yaref{lec10_thm1} will be shown in the notes.\notes
We will need the following:
\begin{lemma}
    \label{lec13_lem1}
    Given a sequence $(F_n)_n$ of probability distribution functions,
    there is a subsequence $(F_{n_k})_k$ of $F_n$
    and a right continuous, non-decreasing function $F$,
    such that $F_{n_k} \to  F$ at all continuity points of $F$.
    (We do not yet claim, that $F$ is a probability distribution function,
    as we ignore $\lim_{x \to \infty} F(x)$ and $\lim_{x \to -\infty} F(x)$ for now).
\end{lemma}
\begin{lemma}
    \label{s7e1}
    Let $\mu \in M_1(\R)$, $A > 0$ and $\phi$ the characteristic function of $\mu$.
    Then $\mu\left( (-A,A) \right) \ge  \frac{A}{2} \left| \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) d t \right| - 1$.
\end{lemma}
\begin{refproof}{s7e1}
    We have
    \begin{IEEEeqnarray*}{rCl}
        \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) \dif t
        &=& \int_{-\frac{2}{A}}^{\frac{2}{A}} \int_{\R} e^{\i t x} \mu(\dif x) \dif t\\
        &=& \int_{\R} \int_{-\frac{2}{A}}^{\frac{2}{A}} e^{\i t x} \dif t \mu(\dif x)\\
        &=& \int_{\R} \int_{-\frac{2}{A}}^{\frac{2}{A}} \cos(t x) \dif t \mu(\dif x)\\
        &=& \int_{\R} \frac{2 \sin\left( \frac{2x}{A}\right) }{x} \mu(\dif x).\\
    \end{IEEEeqnarray*}
    Hence
    \begin{IEEEeqnarray*}{rCl}
        \frac{A}{2}\left|\int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) \dif t\right|
        &=& \left| A \int_{\R} \frac{\sin\left( \frac{2x}{A} \right) }{x} \mu(\dif t)\right|\\
        &=& 2\left| \int_{\R} \sinc\left( \frac{2x}{A} \right) \mu(\dif t)\right|\\
        &\le& 2 \left[ \int_{|x| < A} \underbrace{\left|\sinc\left( \frac{2x}{A} \right) \right|}_{\le 1} \mu(\dif x)
            + \int_{|x| \ge A} \left|\sinc\left( \frac{2x}{A} \right)\right| \mu(\dif x) \right]\\
        &\le& 2 \left[ \mu\left( (-A,A) \right)
            + \frac{A}{2} \int_{|x| \ge A} \frac{\sin(2x / A)|}{|x|} \mu(\dif x) \right]\\
        &\le& 2 \left[ \mu\left( (-A,A) \right)
            + \frac{A}{2} \int_{|x| \ge A} \frac{1}{A} \mu(\dif x) \right]\\
        &\le& 2 \mu((-A,A)) + \mu((-A,A)^c)\\
        &=& 1 + \mu((-A,A)).
    \end{IEEEeqnarray*}
\end{refproof}


\begin{refproof}{levycontinuity}
``$\implies$ '' If $\mu_n \implies \mu$,
then by definition
$\int f \dif \mu_n \to  \int f \dif \mu$
for all $f \in C_b$.
Since $x \to  e^{\i t x}$ is continuous and bounded,
it follows that $\phi_n(t) \to \phi(t)$
for all $t \in \R$.


``$ \impliedby$''

% Step 1:
\begin{claim}
    \label{levyproofc1}
    Given $\epsilon > 0$ there exists $A > 0$ such that
    $\liminf_n \mu_n\left( (-A,A) \right) \ge  1 - 2 \epsilon$.
\end{claim}
\begin{refproof}{levyproofc1}
    If $f$ is continuous, then
    \[
    \frac{1}{\eta} \int_{x - \eta}^{x + \eta} f(t) d t \xrightarrow{\eta \downarrow 0}  f(x).
    \]
    Applying this to $\phi$ at $t = 0$, one obtains:
    \begin{equation}
    \left| \frac{A}{4} \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) dt - 1 \right| < \frac{\epsilon}{2}
    \label{levyproofc1eqn1}
    \end{equation}

    \begin{claim}
        For $n$ large enough, we have
        \begin{equation}
        \left| \frac{A}{4} \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) d t - 1\right| < \epsilon.
        \label{levyproofc1eqn2}
        \end{equation}
    \end{claim}
    \begin{subproof}
        Apply dominated convergence.
    \end{subproof}
    So to prove $\mu_n\left( (-A,A) \right) \ge 1 - 2 \epsilon$,
    apply \yaref{s7e1}.
    It suffices to show that
    \[
        \frac{A}{2} \left| \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) dt\right| - 1 \ge  1 - 2\epsilon
    \]
    or
    \[
    1 - \frac{A}{4} \left|\int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) dt \right| \le \epsilon,
    \]
    which follows from \yaref{levyproofc1eqn2}.
\end{refproof}

% Step 2
By \yaref{lec13_lem1}
there exists a right continuous, non-decreasing $F $
and a subsequence $(F_{n_k})_k$ of $(F_n)_n$ where $F_n$ is
the probability distribution function of $\mu_n$,
such that $F_{n_k}(x) \to F(x)$ for all $x$ where $F$ is continuous.
\begin{claim}
    \[
    \lim_{n \to -\infty} F(x) = 0
    \]
    and
    \[
    \lim_{n \to  \infty} F(x) = 1,
    \]
    i.e.~$F$ is a probability distribution function.\footnote{This does not hold in general!}
\end{claim}
\begin{subproof}
    We have
    \[
    \mu_{n_k}\left( (- \infty, x] \right) = F_{n_k}(x) \to F(x).
    \]
    Again, given $\epsilon > 0$, there exists $A > 0$, such that
    $\mu_{n_k}\left( (-A,A) \right) > 1 - 2 \epsilon$ (\yaref{levyproofc1}).

    Hence $F(x) \ge  1 - 2 \epsilon$ for $x > A $
    and $F(x) \le  2\epsilon$ for $x < -A$.
    This proves the claim.
\end{subproof}

Since $F$ is a probability distribution function, there exists
a probability measure $\nu$ on $\R$ such that $F$ is the distribution
function of $\nu$.
Since $F_{n_k}(x) \to F_n(x)$ at all continuity points $x$ of $F$,
by \yaref{lec10_thm1} we obtain that
$\mu_{n_k} \overset{k \to \infty}{\implies} \nu$.
Hence
$\phi_{\mu_{n_k}}(t) \to \phi_\nu(t)$, by the other direction of that theorem.
But by assumption,
$\phi_{\mu_{n_k}}(\cdot ) \to  \phi_n(\cdot )$ so $\phi_{\mu}(\cdot) = \phi_{\nu}(\cdot )$.
By the \yaref{charfuncuniqueness}, we get $\mu = \nu$.

We have shown, that $\mu_{n_k} \implies \mu$ along a subsequence.
We still need to show that $\mu_n \implies \mu$.
\begin{fact}
    Suppose $a_n$ is a bounded sequence in $\R$,
    such that any convergent subsequence converges to $a \in \R$.
    Then $a_n \to  a$.
\end{fact}
% \begin{subproof}
%     \notes
% \end{subproof}
Assume that $\mu_n$ does not converge to $\mu$.
By \yaref{lec10_thm1}, pick a continuity point $x_0$ of $F$,
such that $F_n(x_0) \not\to F(x_0)$.
Pick $\delta > 0$ and a subsequence $F_{n_1}(x_0), F_{n_2}(x_0), \ldots$
which are all outside $(F(x_0) - \delta, F(x_0) + \delta)$.
Then $\phi_{n_1}, \phi_{n_2}, \ldots \to  \phi$.
Now, there exists a further subsequence $G_1, G_2, \ldots$ of $F_{n_i}$,
which converges.
$G_1, G_2, \ldots$ is a subsequence of $F_1, F_2,\ldots$.
However $G_1, G_2, \ldots$ is not converging to $F$,
as this would fail at $x_0$. This is a contradiction.
\end{refproof}
\begin{refproof}{genlevycontinuity}
    % TODO TODO TODO
\end{refproof}

% IID is over now
\subsection{Summary}
What did we learn:

\begin{itemize}
    \item How to construct product measures
    \item WLLN and SLLN
    \item Kolmogorov's three series theorem
    \item Fourier transform, weak convergence and CLT
\end{itemize}