s23-probability-theory/inputs/lecture_10.tex

\lecture{10}{2023-05-09}{}

First, we will prove some of the most important facts about Fourier transforms.

We consider $(\R, \cB(\R))$.
\begin{notation}
   By $M_1 (\R)$ we denote the set of all probability measures on $\left( \R, \cB(\R) \right)$.
\end{notation}

For all $\bP \in M_1(\R)$ we define $\phi_{\bP}(t) = \int_{\R} e^{\i t x}\bP(\dif x)$.
If $X: (\Omega, \cF) \to (\R, \cB(\R))$ is a random variable, we write
$\phi_X(t) \coloneqq  \bE[e^{\i t X}] = \phi_{\mu}(t)$,
where $\mu = \bP X^{-1}$.


\begin{refproof}{inversionformula}
    We will prove that the limit in the RHS of \yaref{invf}
    exists and is equal to the LHS.
    Note that the term on the RHS is integrable, as
    \[
    \lim_{t \to 0} \frac{e^{-\i t b} - e^{-\i t a}}{- \i t} \phi(t) = a - b
    \]
    and note that $\phi(0) = 1$ and $|\phi(t)| \le 1$.
    % TODO think about this

    We have
    \begin{IEEEeqnarray*}{rCl}
        &&\lim_{T \to  \infty} \frac{1}{2 \pi} \int_{-T}^T \int_{\R} \frac{e^{-\i t b}- e^{-\i t a}}{-\i t} e^{\i t x} \dif t \bP(\dif x)\\
        &\overset{\yaref{thm:fubini}}{=}& \lim_{T \to  \infty} \frac{1}{2 \pi} \int_{\R} \int_{-T}^T \frac{e^{-\i t b}- e^{-\i t a}}{-\i t} e^{\i t x} \dif t \bP(\dif x)\\
        &=& \lim_{T \to  \infty} \frac{1}{2 \pi} \int_{\R} \int_{-T}^T \frac{e^{\i t (b-x)}- e^{\i t (x-a)}}{-\i t} \dif t \bP(\dif x)\\
        &=& \lim_{T \to \infty} \frac{1}{2 \pi} \int_{\R} \underbrace{\int_{-T}^T \left[ \frac{\cos(t (x-b)) - \cos(t(x-a))}{-\i t}\right] \dif t}_{=0 \text{, as the function is odd}} \bP(\dif x) \\
        &&  + \lim_{T \to \infty} \frac{1}{2\pi} \int_{\R}\int_{-T}^T \frac{\sin(t ( x - b)) - \sin(t(x-a))}{-t} \dif t \bP(\dif x)\\
        &=& \lim_{T \to \infty} \frac{1}{\pi} \int_\R \int_{0}^T \frac{\sin(t(x-a)) - \sin(t(x-b))}{t} \dif t \bP(\dif x)\\
        &\overset{\substack{\yaref{fact:sincint},\text{DCT}}}{=}&
          \frac{1}{\pi} \int -\frac{\pi}{2} \One_{x < a} + \frac{\pi}{2} \One_{x > a}
          - (- \frac{\pi}{2} \One_{x < b} + \frac{\pi}{2} \One_{x > b}) \bP(\dif x)\\
        &=& \frac{1}{2} \bP(\{a\} ) + \frac{1}{2} \bP(\{b\}) + \bP((a,b))\\
        &=& \frac{F(b) + F(b-)}{2} - \frac{F(a) - F(a-)}{2}
    \end{IEEEeqnarray*}
\end{refproof}

\begin{fact}
    \label{fact:sincint}
    \[
    \int_0^\infty \frac{\sin x}{x} \dif x = \frac{\pi}{2}
    \]
    where the LHS is an improper Riemann-integral.
    Note that the LHS is not Lebesgue-integrable.
    It follows that
    \begin{IEEEeqnarray*}{rCl}
        \lim_{T \to  \infty} \int_0^T \frac{\sin(t(x-a))}{t} \dif t &=&
    \begin{cases}
        - \frac{\pi}{2} &\text{if }x < a,\\
        0 &\text{if }x = a,\\
        \frac{\pi}{2}&\text{if } x > a.
    \end{cases}
    \end{IEEEeqnarray*}
\end{fact}

\begin{theorem} % Theorem 3
    \label{thm:lec10_3}
    Let $\bP \in M_1(\R)$ such that $\phi_\bP \in L^1(\lambda)$.
    Then $\bP$ has a continuous probability density given by
    \[
    f(x) = \frac{1}{2 \pi} \int_{\R} e^{-\i t x} \phi_{\bP}(t) \dif t.
    \]
\end{theorem}

\begin{example}
    \begin{itemize}
        \item Let $\bP = \delta_{0}$.
          Then
          \[
          \phi_{\bP}(t) = \int e^{\i t x} \delta_0(\dif x) = e^{\i t 0 } = 1
          \]
        \item Let $\bP = \frac{1}{2} \delta_1 + \frac{1}{2} \delta_{-1}$.
            Then
            \[
            \phi_{\bP}(t) = \frac{1}{2} e^{\i t} + \frac{1}{2} e^{- \i t} = \cos(t)
            \]
    \end{itemize}
\end{example}
\begin{refproof}{thm:lec10_3}
    Let $f(x) \coloneqq  \frac{1}{2 \pi} \int_{\R} e^{ - \i t x} \phi(t) \dif t$.
    \begin{claim}
        If $x_n \to  x$, then $f(x_n) \to  f(x)$.
    \end{claim}
    \begin{subproof}
        Suppose that
        $e^{-\i t x_n} \phi(t) \xrightarrow{n \to \infty} e^{-\i t x} \phi(t)$
        for all $t$.
        Since
        \[
        |e^{-\i t x} \phi(t)| \le  |\phi(t)|
        \]
        and $\phi \in L^1$,
        we get $f(x_n) \to  f(x)$
        by the dominated convergence theorem.
    \end{subproof}

    We'll show that for all  $a < b$ we have
    \[
    \bP\left( (a,b] \right)  = \int_a^b f(x) \dif x.\label{thm10_3eq1}
    \]
    Let $F$ be the distribution function of $\bP$.
    It is enough to prove \yaref{thm10_3eq1}
    for all continuity points $a $ and $ b$ of $F$.
    We have
    \begin{IEEEeqnarray*}{rCl}
        \rhs &\overset{\text{Fubini}}{=}& \frac{1}{2 \pi} \int_{\R} \int_{a}^b e^{-\i t x} \phi(t) \dif x \dif t\\
            &=& \frac{1}{2 \pi} \int_\R \phi(t) \int_a^b e^{-\i t x} \dif x \dif t\\
            &=& \frac{1}{2\pi} \int_{\R} \phi(t) \left( \frac{e^{-\i t b} - e^{-\i t a}}{- \i t} \right) \dif t\\
            &\overset{\text{dominated convergence}}{=}& \lim_{T \to \infty} \frac{1}{2\pi} \int_{-T}^{T} \phi(t) \left( \frac{e^{-\i t b} - e^{- \i t a}}{- \i t} \right) \dif t
    \end{IEEEeqnarray*}
    By the \yaref{inversionformula},
    the RHS is equal to $F(b) - F(a) = \bP\left( (a,b] \right)$.
\end{refproof}

However, Fourier analysis is not only useful for continuous probability density functions:

\begin{theorem}[Bochner's formula for the mass at a point]
    \yalabel{Bochner's Formula for the Mass at a Point}{Bochner}{bochnersformula} % Theorem 4
    Let $\bP \in M_1(\lambda)$.
    Then
    \[
    \forall x \in  \R .~ \bP\left( \{x\}  \right) = \lim_{T \to \infty} \frac{1}{2 T} \int_{-T}^T e^{-\i t x } \phi(t) \dif t.
    \]

\end{theorem}
\begin{refproof}{bochnersformula}
    We have
    \begin{IEEEeqnarray*}{rCl}
        \rhs &=& \lim_{T \to  \infty} \frac{1}{2 T} \int_{-T}^T e^{-\i t x} \int_{\R} e^{\i t y} \bP(\dif y) \\
            &\overset{\text{Fubini}}{=}&
              \lim_{T \to  \infty} \frac{1}{2 T} \int_\R \int_{-T}^T
                 e^{-\i t (y - x)} \dif t \bP(\dif y)\\
            &=& \lim_{T \to  \infty} \frac{1}{2 T} \int_\R \int_{-T}^T
                  \cos(t(y-x)) + \underbrace{\i \sin(t (y-x))}_{\text{odd}}
                  \dif t \bP(\dif y)\\
            &=& \lim_{T \to \infty} \frac{1}{2T}\int_{\R}
              \int_{-T}^T \cos(t(y - x)) \dif t \bP(\dif y)\\
            &=& \lim_{T \to \infty} \frac{1}{2T}\int_{\R}
                  2T \sinc(T(y-x))
                  \footnote{$\sinc(x) = \begin{cases}
                          \frac{\sin(x)}{x} &\text{if } x \neq 0,\\
                          1 &\text{otherwise.}
                  \end{cases}$} \bP(\dif y)\\
            &\overset{\text{DCT}}{=}& \int_{\R}\lim_{T \to \infty}
                  \sinc(T(y-x)) \bP(\dif y)\\
            &=& \bP(\{x\}).
    \end{IEEEeqnarray*}
\end{refproof}

\begin{theorem} % Theorem 5
    \label{thm:lec_10thm5}
    Let $\phi$ be the characteristic function of $\bP \in M_1(\lambda)$.
    Then
    \begin{enumerate}[(a)]
        \item $\phi(0) = 1$, $|\phi(t)| \le 1$, $\phi(-t) = \overline{\phi(t)}$
            and $\phi(\cdot )$ is continuous.
        \item $\phi$ is a \vocab{positive definite function},
            i.e.~
            \[\forall t_1,\ldots, t_n \in \R, (c_1,\ldots,c_n) \in \C^n ~ \sum_{j,k = 1}^n c_j \overline{c_k} \phi(t_j - t_k) \ge  0
            \]
            Equivalently, the matrix $(\phi(t_j- t_k))_{j,k}$ is positive definite.
    \end{enumerate}
\end{theorem}
\begin{refproof}{thm:lec_10thm5}
    Part (a) is obvious.

    For part (b) we have:
    \begin{IEEEeqnarray*}{rCl}
        \sum_{j,k}  c_j \overline{c_k} \phi(t_j - t_k) &=& \sum_{j,k}  c_j \overline{c_k} \int_\R e^{\i (t_j - t_k) x} \bP(\dif x)\\
                                                       &=& \int_{\R} \sum_{j,k}  c_j \overline{c_k} e^{\i t_j x} \overline{e^{\i t_k x}} \bP(\dif x)\\
                                                       &=& \int_{\R}\sum_{j,k}  c_j e^{\i t_j x} \overline{c_k e^{\i t_k x}} \bP(\dif x)\\
                                                       &=& \int_{\R} \left| \sum_{l}  c_l e^{\i t_l x}\right|^2 \ge  0
    \end{IEEEeqnarray*}
\end{refproof}
\begin{theorem}[Bochner's theorem]
    \yalabel{Bochner's Theorem for Positive Definite Functions}{Bochner's Theorem}{thm:bochner}%
    The converse to \yaref{thm:lec_10thm5} holds, i.e.~any
    $\phi: \R \to  \C$ satisfying (a) and (b) of \yaref{thm:lec_10thm5}
    must be the Fourier transform of a probability measure $\bP$
    on $(\R, \cB(\R))$.
\end{theorem}
Unfortunately, we won't prove \yaref{thm:bochner} in this lecture.


\begin{definition}[Convergence in distribution / weak convergence]
    \label{def:weakconvergence}
    We say that $\bP_n \in M_1(\R)$ \vocab[Convergence!weak]{converges weakly} towards $\bP \in M_1(\R)$ (notation: $\bP_n \implies \bP$), iff
    \[
    \forall f \in C_b(\R)~  \int f \dif\bP_n \to \int f \dif\bP.
    \]
    Where
    \[
    C_b(\R) \coloneqq  \{ f: \R \to  \R \text{ continuous and bounded}\}
    \]

    In analysis, this is also known as  $\text{weak}^\ast$ convergence.
\end{definition}
\begin{remark}
    This notion of convergence makes $M_1(\R)$ a separable metric space.
    We can construct a metric on $M_1(\R)$ that turns $M_1(\R)$ into a complete
    and separable metric space:

    Consider the sets
    \[
    \{\bP \in M_1(\R): \forall i=1,\ldots,n ~ \int f \dif\bP - \int f_i \dif\bP < \epsilon \}
    \]
    for any $f,f_1,\ldots, f_n \in C_b(\R)$.
    These sets form a basis for the topology on $M_1(\R)$.
    More of this will follow later.
\end{remark}
\begin{example}
    \begin{itemize}
        \item Let $\bP_n = \delta_{\frac{1}{n}}$.
            Then $\int f \dif\bP_n = f(\frac{1}{n}) \to f(0) = \int f d \delta_0$
            for any continuous, bounded function $f$.
            Hence $\bP_n \to \delta_0$.
        \item $\bP_n \coloneqq  \delta_n$ does not converge weakly,
            as for example
            \[
            \int \cos(\pi x) \dif\bP_n(x)
            \]
            does not converge.

        \item $\bP_n \coloneqq  \frac{1}{n} \delta_n + (1- \frac{1}{n}) \delta_0$.
            Let $f \in C_b(\R)$ arbitrary.
            Then
            \[
            \int f \dif\bP_n = \frac{1}{n}(n) + (1 - \frac{1}{n}) f(0) \to f(0)
            \]
            since $f$ is bounded.
            Hence $\bP_n \implies \delta_0$.
        \item $\bP_n \coloneqq  \frac{1}{\sqrt{2 \pi n}} e^{-\frac{x^2}{2n}}$.
            This ``converges'' towards the $0$-measure,
            which is not a probability measure.
            Hence $\bP_n$ does not converge weakly.
            (Exercise) % TODO
    \end{itemize}
\end{example}
\begin{definition}
    We say that a series of random variables $X_n$
     \vocab[Convergence!in distribution]{converges in distribution}
     to $X$ (notation: $X_n \xrightarrow{\text{d}} X$), iff
     $\bP_n \implies \bP$, where $\bP_n$ is the distribution of $X_n$
     and  $\bP$ is the distribution of $X$.
\end{definition}
It is easy to see, that this is equivalent to $\bE[f(X_n)] \to \bE[f(X)]$
for all $f \in  C_b(\R)$.
\begin{example}
    Let $X_n \coloneqq  \frac{1}{n}$
    and $F_n$ the distribution function, i.e.~$F_n = \One_{[\frac{1}{n},\infty)}$.
    Then $\bP_n = \delta_{\frac{1}{n}} \implies \delta_0$
    which is the distribution of $X \equiv 0$.
    But $F_n(0) \centernot\to F(0)$.
\end{example}
\begin{theorem} % Theorem 1
    \label{lec10_thm1}
    $X_n \xrightarrow{\text{d}} X$ iff
    $F_n(t) \to  F(t)$ for all continuity points $t$ of $F$.
\end{theorem}
% \begin{proof}\footnote{This proof was not done in the lecture,
%     but can be found in the official notes from lecture 13}
%     ``$\implies$''
%     Suppose $\mu_n \implies \mu$.
%     Let $F_n$ and $F$ denote the respective density functions.
%     Fix a continuity point $x_0 \in \R$ of $F$.
%     We'll show
%     \[
%     \limsup_{n \to \infty} F_n(x_0) \le  F(x_0) + \epsilon
%     \]
%     and
%     \[
%     \liminf_{ \to \infty} F_n(x_0) \ge  F(x_0) - \epsilon
%     \]
%     for all $\epsilon > 0$.
%     Fix some $\epsilon > 0$.
%     Choose $\delta > 0$ such that $F(x_0 + \delta) < F(x_0) + \epsilon$
%     and define
%     \[
%     g(x) \coloneqq \begin{cases}
%         1 &\text{if }  x \le x_0,\\
%         1 - \frac{1}{\delta}(x - x_0)&
%             \text{if } x \in (x_0, x_0 + \delta],\\
%         0 &\text{if } x \ge x_0 + \delta.
%     \end{cases}
%     \]
%     Since $g$ is continuous and bounded, we have
%     \[
%     \int g \dif \mu_n \to  \int g \dif \mu.
%     \]
%     It is clear that $\One_{(-\infty, x_0]} \le g$.
%     Hence
%     \[
%         F_n(x_0) = \int \One_{(-\infty, x_0]} \dif \mu_n \le \int g \dif \mu_n.
%     \]
%     It follows that
%     \begin{IEEEeqnarray*}{rCl}
%     \limsup_{n} F_n(x_0)
%     &\le& \limsup_n \int g \dif \mu_n\\
%     &=& \lim_n \int g \dif \mu_n\\
%     &=& \int g \dif \mu\\
%     &\overset{g \le  \One_{(-\infty, x + \delta]}}{=}& F(x + \delta)\\
%     &=& F(x) + \epsilon.
%     \end{IEEEeqnarray*}
%     The assertion about $\liminf_{n \to \infty} F_n(x_0)$
%     follows by a similar argument.
%
%     ``$\impliedby$''
%     Assume that $F_n(x) \to  F(x)$ at all continuity points of $F$.
%     We need to show
%     \[
%     \fgrall g \in C_b(\R) .~\int g \dif \mu_n \to \int g \dif \mu.
%     \]
%     Let $C$ denote the set of continuity points of $f$.
%     We apply measure theoretic induction:
%     \begin{itemize}
%         \item For $g = \One_{(a,b]}$, $a< b \in C$,
%         we have
%         \[\int g \dif \mu_n = F_n(b) - F_n(a) \to F(b) - F(a) = \int g  \dif \mu.\]
%         \item For $g = \sum_{i} \alpha_i \One_{(a_i, b_i]}$,
%             $a_i < b_i \in C$,
%             we get $\int g \dif \mu_n \to  \int g \dif \mu$
%             by the same argument.
%         \item % TODO continue from Lec13 page 21 (iii)
%     \end{itemize}
%
% \end{proof}
\begin{theorem}[Levy's continuity theorem]
    \yalabel{Levy's Continuity Theorem}{Levy}{levycontinuity}
    % Theorem 2
    $X_n \xrightarrow{\text{d}} X$ iff
    $\phi_{X_n}(t) \to \phi(t)$ for all $t \in \R$.
\end{theorem}
We will assume these two theorems for now and derive the central limit theorem.
The theorems will be proved later.