282 lines
10 KiB
TeX
282 lines
10 KiB
TeX
\lecture{13}{2023-05}{}
|
|
%The difficult part is to show \autoref{levycontinuity}.
|
|
%This is the last lecture, where we will deal with independent random variables.
|
|
|
|
We have seen, that
|
|
if $X_1, X_2,\ldots$ are i.i.d.~with $ \mu = \bE[X_1]$,
|
|
$\sigma^2 = \Var(X_1)$,
|
|
then $\frac{\sum_{i=1}^{n} (X_i - \mu)}{\sigma \sqrt{n} } \xrightarrow{(d)} \cN(0,1)$.
|
|
|
|
\begin{question}
|
|
What happens if $X_1, X_2,\ldots$ are independent, but not identically distributed? Do we still have a CLT?
|
|
\end{question}
|
|
|
|
\begin{theorem}[Lindeberg CLT]
|
|
\label{lindebergclt}
|
|
Assume $X_1, X_2, \ldots,$ are independent (but not necessarily identically distributed) with $\mu_i = \bE[X_i] < \infty$ and $\sigma_i^2 = \Var(X_i) < \infty$.
|
|
Let $S_n = \sqrt{\sum_{i=1}^{n} \sigma_i^2}$
|
|
and assume that
|
|
\[\lim_{n \to \infty} \frac{1}{S_n^2} \bE\left[(X_i - \mu_i)^2 \One_{|X_i - \mu_i| > \epsilon \S_n}\right] = 0\]
|
|
for all $\epsilon > 0$
|
|
(\vocab{Lindeberg condition}\footnote{``The truncated variance is negligible compared to the variance.''}).
|
|
|
|
Then the CLT holds, i.e.~
|
|
\[
|
|
\frac{\sum_{i=1}^n (X_i - \mu_i)}{S_n} \xrightarrow{(d)} \cN(0,1).
|
|
\]
|
|
\end{theorem}
|
|
|
|
\begin{theorem}[Lyapunov condition]
|
|
\label{lyapunovclt}
|
|
Let $X_1, X_2,\ldots$ be independent, $\mu_i = \bE[X_i] < \infty$,
|
|
$\sigma_i^2 = \Var(X_i) < \infty$
|
|
and $S_n \coloneqq \sqrt{\sum_{i=1}^n \sigma_i^2}$.
|
|
Then, assume that, for some $\delta > 0$,
|
|
\[
|
|
\lim_{n \to \infty} \sum_{i=1}^{n} \bE[(X_i - \mu_i)^{2 + \delta}] = 0
|
|
\]
|
|
(\vocab{Lyapunov condition}).
|
|
Then the CLT holds.
|
|
\end{theorem}
|
|
\begin{remark}
|
|
The Lyapunov condition implies the Lindeberg condition.
|
|
(Exercise).
|
|
\end{remark}
|
|
We will not prove the \autoref{lindebergclt} or \autoref{lyapunovclt}
|
|
in this lecture. However, they are quite important.
|
|
|
|
We will now sketch the proof of \autoref{levycontinuity},
|
|
details can be found in the notes.\notes
|
|
\begin{definition}
|
|
Let $(X_n)_n$ be a sequence of random variables.
|
|
The distribution of $(X_n)_n$ is called
|
|
\vocab[Distribution!tight]{tight} (dt. ``straff''),
|
|
if
|
|
\[
|
|
\lim_{a \to \infty} \sup_{n \in \N} \bP[|X_n| > a] = 0.
|
|
\]
|
|
\end{definition}
|
|
\begin{example}+[Exercise 8.1]
|
|
\todo{Copy}
|
|
\end{example}
|
|
A generalized version of \autoref{levycontinuity} is the following:
|
|
\begin{theorem}[A generalized version of Levy's continuity \autoref{levycontinuity}]
|
|
\label{genlevycontinuity}
|
|
Suppose we have random variables $(X_n)_n$ such that
|
|
$\bE[e^{\i t X_n}] \xrightarrow{n \to \infty} \phi(t)$ for all $t \in \R$
|
|
for some function $\phi$ on $\R$.
|
|
Then the following are equivalent:
|
|
\begin{enumerate}[(a)]
|
|
\item The distribution of $X_n$ is tight.
|
|
\item $X_n \xrightarrow{(d)} X$ for some real-valued random variable $X$.
|
|
\item $\phi$ is the characteristic function of $X$.
|
|
\item $\phi$ is continuous on all of $\R$.
|
|
\item $\phi$ is continuous at $0$.
|
|
\end{enumerate}
|
|
\end{theorem}
|
|
\todo{Proof of \autoref{genlevycontinuity} (Exercise 8.2)}
|
|
\begin{example}
|
|
Let $Z \sim \cN(0,1)$ and $X_n \coloneqq n Z$.
|
|
We have $\phi_{X_n}(t) = \bE[[e^{\i t X_n}] = e^{-\frac{1}{2} t^2 n^2} \xrightarrow{n \to \infty} \One_{\{t = 0\} }$.
|
|
$\One_{\{t = 0\}}$ is not continuous at $0$.
|
|
By \autoref{genlevycontinuity}, $X_n$ can not converge to a real-valued
|
|
random variable.
|
|
|
|
Exercise: $X_n \xrightarrow{(d)} \overline{X}$,
|
|
where $\bP[\overline{X} = \infty] = \frac{1}{2} = \bP[\overline{X} = -\infty]$.
|
|
|
|
Similar examples are $\mu_n \coloneqq \delta_n$ and
|
|
$\mu_n \coloneqq \frac{1}{2} \delta_n + \frac{1}{2} \delta_{-n}$.
|
|
\end{example}
|
|
|
|
\begin{example}
|
|
Suppose that $X_1, X_2,\ldots$ are i.d.d.~with $\bE[X_1] = 0$.
|
|
Let $\sigma^2 \coloneqq \Var(X_i)$.
|
|
Then the distribution of $\frac{S_n}{\sigma \sqrt{n}}$ is tight:
|
|
|
|
\begin{IEEEeqnarray*}{rCl}
|
|
\bE\left[ \left( \frac{S_n}{\sqrt{n} }^2 \right)^2 \right] &=&
|
|
\frac{1}{n} \bE[ (X_1+ \ldots + X_n)^2]\\
|
|
&=& \sigma^2
|
|
\end{IEEEeqnarray*}
|
|
For $a > 0$, by Chebyshev's inequality, % TODO
|
|
we have
|
|
\[
|
|
\bP\left[ \left| \frac{S_n}{\sqrt{n}} \right| > a \right] \leq \frac{\sigma^2}{a^2} \xrightarrow{a \to \infty} 0.
|
|
\]
|
|
verifying \autoref{genlevycontinuity}.
|
|
\end{example}
|
|
|
|
\begin{example}
|
|
Suppose $C$ is a random variable which is \vocab[Cauchy distribution]{Cauchy distributed}, i.e.~$C$
|
|
has probability distribution $f_C(x) = \frac{1}{\pi} \frac{1}{1 + x^2}$.
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
\begin{tikzpicture}
|
|
\begin{axis}[samples=100, smooth]
|
|
\addplot[] { (1/3.14159265358979323846) * (1 / ( 1 + x * x))};
|
|
\end{axis}
|
|
\end{tikzpicture}
|
|
\caption{Probability density function of $C$}
|
|
\end{figure}
|
|
|
|
We know that $\bE[|C|] = \infty$.
|
|
|
|
We have $\phi_C(t) = \bE[e^{\i t C}] = e^{-|t|}$.
|
|
Suppose $C_1, C_2, \ldots, C_n$ are i.i.d.~Cauchy distributed
|
|
and let $S_n \coloneqq C_1 + \ldots + C_n$.
|
|
|
|
Exercise: $\phi_{\frac{S_n}{n}}(t) = e^{-|t|} = \phi_{C_1}(t)$, thus $\frac{S_n}{n} \sim C$.
|
|
\end{example}
|
|
|
|
We will prove \autoref{levycontinuity} assuming
|
|
\autoref{lec10_thm1}.
|
|
\autoref{lec10_thm1} will be shown in the notes.\notes
|
|
We will need the following:
|
|
\begin{lemma}
|
|
\label{lec13_lem1}
|
|
Given a sequence $(F_n)_n$ of probability distribution functions,
|
|
there is a subsequence $(F_{n_k})_k$ of $F_n$
|
|
and a right continuous, non-decreasing function $F$,
|
|
such that $F_{n_k} \to F$ at all continuity points of $F$.
|
|
(We do not yet claim, that $F$ is a probability distribution function,
|
|
as we ignore $\lim_{x \to \infty} F(x)$ and $\lim_{x \to -\infty} F(x)$ for now).
|
|
\end{lemma}
|
|
\begin{lemma}
|
|
\label{s7e1}
|
|
Let $\mu \in M_1(\R)$, $A > 0$ and $\phi$ the characteristic function of $\mu$.
|
|
Then $\mu\left( (-A,A) \right) \ge \frac{A}{2} \left| \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) d t \right| - 1$.
|
|
\end{lemma}
|
|
\begin{refproof}{s7e1}
|
|
Exercise.\todo{TODO}
|
|
\end{refproof}
|
|
|
|
|
|
|
|
\begin{refproof}{levycontinuity}
|
|
``$\implies$ '' If $\mu_n \implies \mu$, then
|
|
$\int f d \mu_n \to \int f d \mu$
|
|
for all $f \in C_b$ and $x \to e^{\i t x}$ is continuous and bounded.
|
|
|
|
|
|
``$ \impliedby$''
|
|
|
|
% Step 1:
|
|
\begin{claim}
|
|
\label{levyproofc1}
|
|
Given $\epsilon > 0$ there exists $A > 0$ such that
|
|
$\liminf_n \mu_n\left( (-A,A) \right) \ge 1 - 2 \epsilon$.
|
|
\end{claim}
|
|
\begin{refproof}{levyproofc1}
|
|
If $f$ is continuous, then
|
|
\[
|
|
\frac{1}{\eta} \int_{x - \eta}^{x + \eta} f(t) d t \xrightarrow{\eta \downarrow 0} f(x).
|
|
\]
|
|
Applying this to $\phi$ at $t = 0$, one obtains:
|
|
\begin{equation}
|
|
\left| \frac{A}{4} \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi(t) dt - 1 \right| < \frac{\epsilon}{2}
|
|
\label{levyproofc1eqn1}
|
|
\end{equation}
|
|
|
|
\begin{claim}
|
|
For $n$ large enough, we have
|
|
\begin{equation}
|
|
\left| \frac{A}{4} \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) d t - 1\right| < \epsilon.
|
|
\label{levyproofc1eqn2}
|
|
\end{equation}
|
|
\end{claim}
|
|
\begin{subproof}
|
|
Apply dominated convergence.
|
|
\end{subproof}
|
|
So to prove $\mu_n\left( (-A,A) \right) \ge 1 - 2 \epsilon$,
|
|
apply \autoref{s7e1}.
|
|
It suffices to show that
|
|
\[
|
|
\frac{A}{2} \left| \int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) dt\right| - 1 \ge 1 - 2\epsilon
|
|
\]
|
|
or
|
|
\[
|
|
1 - \frac{A}{4} \left|\int_{-\frac{2}{A}}^{\frac{2}{A}} \phi_n(t) dt \right| \le \epsilon,
|
|
\]
|
|
which follows from \autoref{levyproofc1eqn2}.
|
|
\end{refproof}
|
|
|
|
% Step 2
|
|
By \autoref{lec13_lem1}
|
|
there exists a right continuous, non-decreasing $F $
|
|
and a subsequence $(F_{n_k})_k$ of $(F_n)_n$ where $F_n$ is
|
|
the probability distribution function of $\mu_n$,
|
|
such that $F_{n_k}(x) \to F(x)$ for all $x$ where $F$ is continuous.
|
|
\begin{claim}
|
|
\[
|
|
\lim_{n \to -\infty} F(x) = 0
|
|
\]
|
|
and
|
|
\[
|
|
\lim_{n \to \infty} F(x) = 1,
|
|
\]
|
|
i.e.~$F$ is a probability distribution function.\footnote{This does not hold in general!}
|
|
\end{claim}
|
|
\begin{subproof}
|
|
We have
|
|
\[
|
|
\mu_{n_k}\left( (- \infty, x] \right) = F_{n_k}(x) \to F(x).
|
|
\]
|
|
Again, given $\epsilon > 0$, there exists $A > 0$, such that
|
|
$\mu_{n_k}\left( (-A,A) \right) > 1 - 2 \epsilon$ (\autoref{levyproofc1}).
|
|
|
|
Hence $F(x) \ge 1 - 2 \epsilon$ for $x > A $
|
|
and $F(x) \le 2\epsilon$ for $x < -A$.
|
|
This proves the claim.
|
|
\end{subproof}
|
|
|
|
Since $F$ is a probability distribution function, there exists
|
|
a probability measure $\nu$ on $\R$ such that $F$ is the distribution
|
|
function of $\nu$.
|
|
Since $F_{n_k}(x) \to F_n(x)$ at all continuity points $x$ of $F$.
|
|
By \autoref{lec10_thm1} we obtain that
|
|
$\mu_{n_k} \overset{k \to \infty}{\implies} \nu$.
|
|
Hence
|
|
$\phi_{\mu_{n_k}}(t) \to \phi_\nu(t)$, by the other direction of that theorem.
|
|
But by assumption,
|
|
$\phi_{\mu_{n_k}}(\cdot ) \to \phi_n(\cdot )$ so $\phi_{\mu}(\cdot) = \phi_{\nu}(\cdot )$.
|
|
By \autoref{charfuncuniqueness}, we get $\mu = \nu$.
|
|
|
|
We have shown, that $\mu_{n_k} \implies \mu$ along a subsequence.
|
|
We still need to show that $\mu_n \implies \mu$.
|
|
\begin{fact}
|
|
Suppose $a_n$ is a bounded sequence in $\R$,
|
|
such that any subsequence converges to $a \in \R$.
|
|
Then $a_n \to a$.
|
|
\end{fact}
|
|
\begin{subproof}
|
|
\notes
|
|
\end{subproof}
|
|
Assume that $\mu_n$ does not converge to $\mu$.
|
|
By \autoref{lec10_thm1}, pick a continuity point $x_0$ of $F$,
|
|
such that $F_n(x_0) \not\to F(x_0)$.
|
|
Pick $\delta > 0$ and a subsequence $F_{n_1}(x_0), F_{n_2}(x_0), \ldots$
|
|
which are all outside $(F(x_0) - \delta, F(x_0) + \delta)$.
|
|
Then $\phi_{n_1}, \phi_{n_2}, \ldots \to \phi$.
|
|
Now, there exists a further subsequence $G_1, G_2, \ldots$ of $F_{n_i}$,
|
|
which converges.
|
|
$G_1, G_2, \ldots$ is a subsequence of $F_1, F_2,\ldots$.
|
|
However $G_1, G_2, \ldots$ is not converging to $F$,
|
|
as this would fail at $x_0$. This is a contradiction.
|
|
\end{refproof}
|
|
|
|
|
|
|
|
% IID is over now
|
|
\subsection{Summary}
|
|
What did we learn:
|
|
|
|
\begin{itemize}
|
|
\item How to construct product measures
|
|
\item WLLN and SLLN
|
|
\item Kolmogorov's three series theorem
|
|
\item Fourier transform, weak convergence and CLT
|
|
\end{itemize}
|
|
|
|
|