lectures 1 - 10

This commit is contained in:
Josia Pietsch 2023-05-10 18:56:36 +02:00
parent dca8a1b435
commit 94787468d1
Signed by untrusted user who does not match committer: josia
GPG key ID: E70B571D66986A2D
15 changed files with 1170 additions and 146 deletions

20
inputs/intro.tex Normal file
View file

@ -0,0 +1,20 @@
These are my notes on the lecture probability theory, taught by Prof.~\textsc{Chiranjib Mukherjee}
in the summer term 2023 at the University Münster.
\begin{warning}
This is not an official script.
The official lecture notes can be found on the Learnweb.
\end{warning}
These notes contain errors almost surely.
If you find some error or want to improve something, send me a message: \texttt{probabilitytheory@jrpie.de}.
Topics of this lecture:
\begin{enumerate}[(1)]
\item Limit theorems: Laws of large numbers and the central limit theorem for i.i.d.~sequences,
\item Conditional expectation and conditional probabilities,
\item Martingales,
\item Markov chains.
\end{enumerate}

95
inputs/lecture_1.tex Normal file
View file

@ -0,0 +1,95 @@
% Lecture 1 - 2023-04-04
First, let us recall some basic definitions:
\begin{definition}
A \vocab{probability space} is a triplet $(\Omega, \cF, \bP)$,
such that
\begin{itemize}
\item $\Omega \neq \emptyset$,
\item $\cF$ is a $\sigma$-algebra over $\Omega$, i.e.~$\cF \subseteq \cP(\Omega)$ and
\begin{itemize}
\item $\emptyset, \Omega \in \cF$,
\item $A \in \cF \implies A^c \in \cF$,
\item $A_1, A_2,\ldots \in \cF \implies \bigcup_{i \in \N} A_i \in \cF$.
\end{itemize}
The elements of $\cF$ are called \vocab[Event]{events}.
\item $\bP$ is a \vocab{probability measure}, i.e.~$\bP$ is a function $\bP: \cF \to [0,1]$
such that
\begin{itemize}
\item $\bP(\emptyset) = 1$, $\bP(\Omega) = 1$,
\item $\bP\left( \bigsqcup_{n \in \N} A_n \right) = \sum_{n \in \N} \bP(A_n)$
for mutually disjoint $A_n \in \cF$.
\end{itemize}
\end{itemize}
\end{definition}
\begin{definition}
A \vocab{random variable} $X : (\Omega, \cF) \to (\R, \cB(\R))$
is a measurable function, i.e.~for all $B \in \cB(\R)$ we have $X^{-1}(B) \in \cF$.
(Equivalently $X^{-1}\left( (a,b] \right) \in \cF$ for all $a < b \in \R$ ).
\end{definition}
\begin{definition}
$F: \R \to \R_+$ is a \vocab{distribution function} iff
\begin{itemize}
\item $F$ is monotone non-decreasing,
\item $F$ is right-continuous,
\item $\lim_{x \to -\infty} F(x) = 0$ and $\lim_{x \to \infty} F(x) = 1$.
\end{itemize}
\end{definition}
\begin{fact}
Let $\bP$ be a probability measure on $(\R, \cB(\R))$.
Then $F(x) \coloneqq\bP\left( (-\infty, x] \right)$
is a probability distribution function.
(See lemma 2.4.2 in the lecture notes of Stochastik)
\end{fact}
The converse to this fact is also true:
\begin{theorem}[Kolmogorov's existence theorem / basic existence theorem of probability theory]
\label{kolmogorovxistence}
Let $\cF(\R)$ be the set of all distribution functions on $\R$
and let $\cM(\R)$ be the set of all probability measures on $\R$.
Then there is a one-to-one correspondence between $\cF(\R)$ and $\cM(\R)$
given by
\begin{IEEEeqnarray*}{rCl}
\cM(\R) &\longrightarrow & \cF(\R)\\
\bP &\longmapsto & \begin{pmatrix*}[l]
\R &\longrightarrow & \R_+ \\
x &\longmapsto & \bP((-\infty, x]).
\end{pmatrix*}
\end{IEEEeqnarray*}
\end{theorem}
\begin{proof}
See theorem 2.4.3 in Stochastik.
\end{proof}
\begin{example}[Some important probability distribution functions]\hfill
\begin{enumerate}[(1)]
\item \vocab{Uniform distribution} on $[0,1]$:
\[
F(x) = \begin{cases}
0 & x \in (-\infty, 0],\\
x & x \in (0,1],\\
1 & x \in (1,\infty).\\
\end{cases}
\]
\item \vocab{Exponential distribution}:
\[
F(x) = \begin{cases}
1 - e^{-\lambda x} & x \ge 0,\\
0 & x < 0.
\end{cases}
\]
\item \vocab{Gaussian distribution}:
\[
\Phi(x) \coloneqq \frac{1}{\sqrt{2\pi}} \int_{-\infty}^x e^{-\frac{y^2}{2}} dy.
\]
\item $\bP[X = 1] = \bP[X = -1] = \frac{1}{2}$ :
\[
F(x) = \begin{cases}
0 & x \in (-\infty, -1),\\
\frac{1}{2} & x \in [-1,1),\\
1 & x \in [1, \infty).
\end{cases}
\]
\end{enumerate}
\end{example}

259
inputs/lecture_10.tex Normal file
View file

@ -0,0 +1,259 @@
% lecture 10 - 2023-05-09
% RECAP
First, we will prove some of the most important facts about Fourier transforms.
We consider $(\R, \cB(\R))$.
\begin{notation}
By $M_1 (\R)$ we denote the set of all probability measures on $\left( \R, \cB(\R) \right)$.
\end{notation}
For all $\bP \in M_1(\R)$ we define $\phi_{\bP}(t) = \int_{\R} e^{\i t x}d\bP(x)$.
If $X: (\Omega, \cF) \to (\R, \cB(\R))$ is a random variable, we write
$\phi_X(t) \coloneqq \bE[e^{\i t X}] = \phi_{\mu}(t)$,
where $\mu = \bP X^{-1}$.
\begin{refproof}{inversionformula}
We will prove that the limit in the RHS of \autoref{invf}
exists and is equal to the LHS.
Note that the term on the RHS is integrable, as
\[
\lim_{t \to 0} \frac{e^{-\i t b} - e^{-\i t a}}{- \i t} \pi(t) = a - b
\]
and note that $\phi(0) = 1$ and $|\phi(t)| \le 1$.
% TODO think about this
We have
\begin{IEEEeqnarray*}{rCl}
&&\lim_{T \to \infty} \frac{1}{2 \pi} \int_{-T}^T \int_{\R} \frac{e^{-\i t b}- e^{-\i t a}}{-\i t} e^{\i t x} d \bP(x)\\
&\overset{\text{Fubini for $L^1$}}{=}& \lim_{T \to \infty} \frac{1}{2 \pi} \int_{\R} \int_{-T}^T \frac{e^{-\i t b}- e^{-\i t a}}{-\i t} e^{\i t x} d \bP(x)\\
&=& \lim_{T \to \infty} \frac{1}{2 \pi} \int_{\R} \int_{-T}^T \frac{e^{\i t (b-x)}- e^{\i t (x-a)}}{-\i t} d \bP(x)\\
&=& \lim_{T \to \infty} \frac{1}{2 \pi} \int_{\R} \underbrace{\int_{-T}^T \left[ \frac{\cos(t (x-b)) - \cos(t(x-a))}{-\i t}\right] d \bP(x)}_{=0 \text{, as the function is odd}}
\\&&
+ \lim_{T \to \infty} \frac{1}{2\pi} \int_{\R}\int_{-T}^T \frac{\sin(t ( x - b)) - \sin(t(x-a))}{-t} dt d\bP(x)\\
&=& \lim_{T \to \infty} \frac{1}{\pi} \int_\R \int_{0}^T \frac{\sin(t(x-a)) - \sin(t(x-b))}{t} dt d\bP(x)\\
&\overset{\text{\autoref{fact:intsinxx}, dominated convergence}}{=}& \frac{1}{\pi} \int -\frac{\pi}{2} \One_{x < a} + \frac{\pi}{2} \One_{x > a }
- (- \frac{\pi}{2} \One_{x < b} + \frac{\pi}{2} \One_{x > b}) d\bP(x)\\
&=& \frac{1}{2} \bP(\{a\} ) + \frac{1}{2} \bP(\{b\}) + \bP((a,b))\\
&=& \frac{F(b) + F(b-)}{2} - \frac{F(a) - F(a-)}{2}
\end{IEEEeqnarray*}
\end{refproof}
\begin{fact}
\label{fact:intsinxx}
\[
\int_0^\infty \frac{\sin x}{x} dx = \frac{\pi}{2}
\]
where the LHS is an improper Riemann-integral.
Note that the LHS is not Lebesgue-integrable.
It follows that
\begin{IEEEeqnarray*}{rCl}
\lim_{T \to \infty} \int_0^T \frac{\sin(t(x-a))}{x} dt &=&
\begin{cases}
- \frac{\pi}{2}, &x < a,\\
0, &x = a,\\
\frac{\pi}{2}, & \frac{\pi}{2}
\end{cases}
\end{IEEEeqnarray*}
\end{fact}
\begin{theorem} % Theorem 3
\label{thm:lec10_3}
Let $\bP \in M_1(\R)$ such that $\phi_\R \in L^1(\lambda)$.
Then $\bP$ has a continuous probability density given by
\[
f(x) = \frac{1}{2 \pi} \int_{\R} e^{-\i t x} \phi_{\R(t) dt}.
\]
\end{theorem}
\begin{example}
\begin{itemize}
\item Let $\bP = \delta_{\{0\}}$.
Then
\[
\phi_{\R}(t) = \int e^{\i t x} d \delta_0(x) = e^{\i t 0 } = 1
\]
\item Let $\bP = \frac{1}{2} \delta_1 + \frac{1}{2} \delta_{-1}$.
Then
\[
\phi_{\R}(t) = \frac{1}{2} e^{\i t} + \frac{1}{2} e^{- \i t} = \cos(t)
\]
\end{itemize}
\end{example}
\begin{refproof}{thm:lec10_3}
Let $f(x) \coloneqq \frac{1}{2 \pi} \int_{\R} e^{ - \i t x} \phi(t) dt$.
\begin{claim}
If $x_n \to x$, then $f(x_n) \to f(x)$.
\end{claim}
\begin{subproof}
If $e^{-\i t x_n} \phi(t) \xrightarrow{n \to \infty} e^{-\i t x } \phi(t) $ for all $t$.
Then
\[
|e^{-\i t x} \phi(t)| \le |\phi(t)|
\]
and $\phi \in L^1$, hence $f(x_n) \to f(x)$
by the dominated convergence theorem.
\end{subproof}
We'll show that for all $a < b$ we have
\[
\bP\left( (a,b] \right) = \int_a^b (x) dx.\label{thm10_3eq1}
\]
Let $F$ be the distribution function of $\bP$.
It is enough to prove \autoref{thm10_3eq1}
for all continuity points $a $ and $ b$ of $F$.
We have
\begin{IEEEeqnarray*}{rCl}
RHS &\overset{\text{Fubini}}{=}& \frac{1}{2 \pi} \int_{\R} \int_{a}^b e^{-\i t x} \phi(t) dx dt\\
&=& \frac{1}{2 \pi} \int_\R \phi(t) \int_a^b e^{-\i t x} dx dt\\
&=& \frac{1}{2\pi} \int_{\R} \phi(t) \left( \frac{e^{-\i t b} - e^{-\i t a}}{- \i t} \right) dt\\
&\overset{\text{dominated convergence}}{=}& \lim_{T \to \infty} \frac{1}{2\pi} \int_{-T}^{T} \phi(t) \left( \frac{e^{-\i t b} - e^{- \i t a}}{- \i t} \right) dt
\end{IEEEeqnarray*}
By \autoref{inversionformula}, the RHS is equal to $F(b) - F(a) = \bP\left( (a,b] \right)$.
\end{refproof}
However, Fourier analysis is not only useful for continuous probability density functions:
\begin{theorem}[Bochner's formula for the mass at a point]\label{bochnersformula} % Theorem 4
Let $\bP \in M_1(\lambda)$.
Then
\[
\forall x \in \R ~ \bP\left( \{x\} \right) = \lim_{T \to \infty} \frac{1}{2 T} \int_{-T}^T e^{-\i t x } \phi(t) dt.
\]
\end{theorem}
\begin{refproof}{bochnersformula}
We have
\begin{IEEEeqnarray*}{rCl}
RHS &=& \lim_{T \to \infty} \frac{1}{2 T} \int_{-T}^T e^{-\i t x} \int_{\R} e^{\i t y} d \bP(y) \\
&\overset{\text{Fubini}}{=}& \lim_{T \to \infty} \frac{1}{2 T} \int_\R \bP(dy) \int_{-T}^T \underbrace{e^{-\i t (y - x)}}_{\cos(t ( y - x)) + \i \sin(t (y-x))} dt\\
&=& \lim_{T \to \infty} \frac{1}{2T} \int_{\R} d\bP(y) \int_{-T}^T \cos(t(y - x)) dt\\
&=& \lim_{T \to \infty} \frac{1}{2 T }\int_{\R} \frac{2 \sin(T (y-x)}{T (y-x)} d \bP(y)\\
\end{IEEEeqnarray*}
Furthermore
\[
\lim_{T \to \infty} \frac{\sin(T(x-y)}{T (y- x)} = \begin{cases}
1, &y = x,\\
0, &y \neq x.
\end{cases}
\]
Hence
\begin{IEEEeqnarray*}{rCl}
\lim_{T \to \infty} \frac{1}{2 T }\int_{\R} \frac{2 \sin(T (y-x)}{T (y-x)} d \bP(y) &=& \bP\left( \{x\}\right)
\end{IEEEeqnarray*}
% TODO by dominated convergence?
\end{refproof}
\begin{theorem} % Theorem 5
\label{thm:lec_10thm5}
Let $\phi$ be the characteristic function of $\bP \in M_1(\lambda)$.
Then
\begin{enumerate}[(a)]
\item $\phi(0) = 1$, $|\phi(t)| \le t$ and $\phi(\cdot )$ is continuous.
\item $\phi$ is a \vocab{positive definite function},
i.e.~
\[\forall t_1,\ldots, t_n \in \R, (c_1,\ldots,c_n) \in \C^n ~ \sum_{j,k = 1}^n c_j \overline{c_k} \phi(t_j - t_k) \ge 0
\]
(equivalently, the matix $(\phi(t_j- t_k))_{j,k}$ is positive definite.
\end{enumerate}
\end{theorem}
\begin{refproof}{thm:lec_10thm5}
Part (a) is obvious.
% TODO
For part (b) we have:
\begin{IEEEeqnarray*}{rCl}
\sum_{j,k} c_j \overline{c_k} \phi(t_j - t_k) &=& \sum_{j,k} c_j \overline{c_k} \int_\R e^{\i (t_j - t_k) x} d \bP(x)\\
&=& \int_{\R} \sum_{j,k} c_j \overline{c_k} e^{\i t_j x} \overline{e^{\i t_k x}} d\bP(x)\\
&=& \int_{\R}\sum_{j,k} c_j e^{\i t_j x} \overline{c_k e^{\i t_k x}} d\bP(x)\\
&=& \int_{\R} \left| \sum_{l} c_l e^{\i t_l x}\right|^2 \ge 0
\end{IEEEeqnarray*}
\end{refproof}
\begin{theorem}[Bochner's theorem]\label{bochnersthm}
The converse to \autoref{thm:lec_10thm5} holds, i.e.~
any $\phi: \R \to \C$ satisfying (a) and (b) of \autoref{thm:lec_10thm5}
must be the Fourier transform of a probability measure $\bP$
on $(\R, \cB(\R))$.
\end{theorem}
Unfortunately, we won't prove \autoref{bochnersthm} in this lecture.
\begin{definition}[Convergence in distribution / weak convergence]
We say that $\bP_n \subseteq M_1(\R)$ \vocab[Convergence!weak]{converges weakly} towards $\bP \in M_1(\R)$ (notation: $\bP_n \implies \bP$), iff
\[
\forall f \in C_b(\R)~ \int f d\bP_n \to \int f d\bP.
\]
Where
\[
C_b(\R) \coloneqq \{ f: \R \to \R \text{ continuous and bounded}\}
\]
In analysis, this is also known as $\text{weak}^\ast$ convergence.
\end{definition}
\begin{remark}
This notion of convergence makes $M_1(\R)$ a separable metric space. We can construc a metric on $M_1(\R)$ that turns $M_1(\R)$ into a complete
and separable metric space:
Consider the sets
\[
\{\bP \in M_1(\R): \forall i=1,\ldots,n ~ \int f d \bP - \int f_i d\bP < \epsilon \}
\]
for any $f,f_1,\ldots, f_n \in C_b(\R)$.
These sets form a basis for the topology on $M_1(\R)$.
More of this will follow later.
\end{remark}
\begin{example}
\begin{itemize}
\item Let $\bP_n = \delta_{\frac{1}{n}}$.
Then $\int f d \bP_n = f(\frac{1}{n}) \to f(0) = \int f d \delta_0$
for any continuous, bounded function $f$.
Hence $\bP_n \to \delta_0$.
\item $\bP_n \coloneqq \delta_n$ does not converge weakly,
as for example
\[
\int \cos(\pi x) d\bP_n(x)
\]
does not converge.
\item $\bP_n \coloneqq \frac{1}{n} \delta_n + (1- \frac{1}{n}) \delta_0$.
Let $f \in C_b(\R)$ arbitrary.
Then
\[
\int f d\bP_n = \frac{1}{n}(n) + (1 - \frac{1}{n}) f(0) \to f(0)
\]
since $f$ is bounded.
Hence $\bP_n \implies \delta_0$.
\item $\bP_n \coloneqq \frac{1}{\sqrt{2 \pi n}} e^{-\frac{x^2}{2n}}$.
This ``converges'' towards the $0$-measure, which is not a probability measure. Hence $\bP_n$ does not converge weakly.
(Exercise) % TODO
\end{itemize}
\end{example}
\begin{definition}
We say that a series of random variables $X_n$
\vocab[Convergence!in distribution]{converges in distribution}
to $X$ (notation: $X_n \xrightarrow{\text{dist}} X$), iff
$\bP_n \implies \bP$, where $\bP_n$ is the distribution of $X_n$
and $\bP$ is the distribution of $X$.
\end{definition}
\begin{example}
Let $X_n \coloneqq \frac{1}{n}$
and $F_n$ the distribution function, i.e.~$F_n = \One_{[\frac{1}{n},\infty)}$.
Then $\bP_n = \delta_{\frac{1}{n}} \implies \delta_0$
which is the distribution of $X \equiv 0$.
But $F_n(0) \centernot\to F(0)$.
\end{example}
\begin{theorem}
$X_n \xrightarrow{\text{dist}} X$ iff
$F_n(t) \to F(t)$ for all continuity points $t$ of $F$.
\end{theorem}
\begin{theorem}[Levy's continuity theorem]\label{levycontinuity}
$X_n \xrightarrow{\text{dist}} X$ iff
$\phi_{X_n}(t) \to \phi(t)$ for all $t \in \R$.
\end{theorem}
We will assume these two theorems for now and derive the central limit theorem.
The theorems will be proved later.

View file

@ -1,13 +1,7 @@
$(\Omega, \cF, \bP)$ Probability Space, $X : ( \Omega, \cF) \to (\R, \cB(\R))$ random variable.
Then $\Q(\cdot) = \bP [ x\in \cdot ]$ is the distribution of $X$ under $\bP$.
\section{Independence and product measures}
In order to define the notion of independence, we first need to construct
product measures in order to be able to consider several random variables
at the same time.
product measures.
The finite case of a product is straightforward:
\begin{theorem}{Product measure (finite)}
@ -44,37 +38,47 @@ We now want to construct a product measure for infinite products.
\begin{example}
Suppose we throw a dice twice. Let $A \coloneqq \{\text{first throw even}\}$,
$B \coloneqq \{second throw even\}$
$B \coloneqq \{\text{second throw even}\}$
and $C \coloneqq \{\text{sum even}\} $.
Are $\One_A, \One_B, \One_C$ mutually independent random variables?
It is easy the see, that the random variables are pairwise independent,
but not mutually independent.
\end{example}
It is easy the see, that the random variables are pairwise independent,
but not mutually independent.
The definition of mutual independence can be rephrased as follos:
Let $X_1, X_2, \ldots, X_n$ r.v.s. Let $\bP[(X_1,\ldots, X_n) \in \cdot ] \text{\reflectbox{$\coloneqq$}} \Q^{\otimes}(\cdot )$.
\begin{definition}
Let $(\Omega, \cF, \bP)$ be a probability space
and $X : ( \Omega, \cF) \to (\R, \cB(\R))$ a random variable.
Then $\Q(\cdot) \coloneqq \bP [ X \in \cdot ]$ is called the \vocab{distribution}
of $X$ under $\bP$.
\end{definition}
Let $X_1, \ldots, X_n$ be random variables and $\Q^{\otimes}(\cdot ) \coloneqq \bP[(X_1,\ldots, X_n) \in \cdot ]$
their \vocab{joint distribution}.
Then $\Q^{\otimes}$ is a probability measure on $\R^n$.
The definition of mutual independence can be rephrased as follows:
\begin{fact}
$X_1,\ldots, X_n$ are mutually independent iff $\Q^{\otimes} = \Q_1 \otimes \ldots \otimes \Q_n$.
$X_1,\ldots, X_n$ are mutually independent iff $\Q^{\otimes} = \Q_1 \otimes \ldots \otimes \Q_n$,
where $\Q_i$ is the distribution of $X_i$.
In this setting, $\Q_i$ is called the \vocab{marginal distribution} of $X_i$.
\end{fact}
By constructing an infinite product, we can thus extend the notion of independence
to an infinite number of r.v.s.
to an infinite number of random variables.
\begin{goal}
Can we construct infinitely many independent random variables?
\end{goal}
\begin{definition}[Consistent family of random variables]
\label{def:consistentfamily}
Let $\bP_n, n \in \N$ be a family of probability measures on $(\R^n, \cB(\R^n))$.
The family is called \vocab{consistent} if if
The family is called \vocab{consistent} if
$\bP_{n+1}[B_1 \times B_2 \times \ldots \times B_n \times \R] = \bP_n[B_1 \times \ldots \times B_n]$
for all $n \in \N, B_i \in B(\R)$.
\end{definition}
\begin{theorem}[Kolmogorov extension / consistency theorem]
\label{thm:kolmogorovconsistency}
Informally:
``Probability measures are determined by finite-dimensional marginals
(as long as these marginals are nice)''
@ -96,8 +100,7 @@ to an infinite number of r.v.s.
index sets. However this requires a different notion of consistency.
\end{remark}
\begin{example}of a consistent family:
\begin{example}[A consistent family]
Let $F_1, \ldots, F_n$ be probability distribution functions
and let $\bP_n$ be the probability measure on $\R^n$ defined
by

View file

@ -1,3 +1,4 @@
\todo{Lecture 3 needs to be finished}
\begin{notation}
Let $\cB_n$ denote $\cB(\R^n)$.
\end{notation}
@ -19,7 +20,7 @@ First we need to define $\cB_{\infty}$.
This $\sigma$-algebra must contain all sets $\prod_{n \in \N} B_n$
for all $B_n \in \cB_1$. We simply define $\cB_{\infty}$ to be the
$\sigma$-algebra
Let $\cB_\infty \coloneqq \sigma \left( \{\prod_{n \in \N} B_n | B_n \in \cB(\R)\} \right)$.
Let \[\cB_\infty \coloneqq \sigma \left( \left\{\prod_{n \in \N} B_n : B_n \in \cB(\R)\right\} \right).\]
\begin{question}
What is there in $\cB_\infty$?
Can we identify sets in $\cB_\infty$ for which we can define the product measure
@ -41,25 +42,30 @@ Recall the following theorem from measure theory:
\label{caratheodory}
Suppose $\cA$ is an algebra (i.e.~closed under finite union)
und $\Omega \neq \emptyset$.
Suppose $\bP$ is countably additive on $\cA$ (i.e.~if $(A_n)_{n}$
Suppose $\bP$ is countably additive on $\cA$ (i.e.~if $(A_n)_{n}$
are pairwise disjoint and $\bigcup_{n \in \N} A_n \subseteq \cA $
then $\bP\left( \bigcup_{n \in \N} A_n \right) = \sum_{n \in \N} \bP(A_n)$).
Then $\bP$ extends uniquely to a probability measure on $(\Omega, \cF)$,
where $\cF = \sigma(\cA)$.
\end{theorem}
Define $\cF = \bigcup_{n \in \N} \cF_n$. Check that $\cF$ is an algebra.
Define $\cF = \bigcup_{n \in \N} \cF_n$. Then $\cF$ is an algebra.
We'll show that if we define $\lambda: \cF \to [0,1]$ with
$\lambda(A) = \lambda_n(A)$ for any $n$ where this is well defined,
then $\lambda$ is countably additive on $\cF$.
Using \autoref{caratheodory} $\lambda$ will extend uniquely to a probability measure on $\sigma(\cF)$.
Using \autoref{caratheodory}, $\lambda$ will extend uniquely to a probability measure on $\sigma(\cF)$.
We want to prove:
\begin{enumerate}[(1)]
\item $\sigma(\cF) = \cB_\infty$,
\item $\lambda$ as defined above is countably additive on $\F$.
\end{enumerate}
\begin{proof}[Proof of (1)]
\begin{claim}
\label{claim:sF=Binfty}
$\sigma(\cF) = \cB_\infty$.
\end{claim}
\begin{claim}
\label{claim:lambdacountadd}
$\lambda$ as defined above is countably additive on $\cF$.
\end{claim}
\begin{refproof}{claim:sF=Binfty}
Consider an infinite dimensional box $\prod_{n \in \N} B_n$.
We have
\[
@ -85,7 +91,7 @@ We want to prove:
Hence $\cF_n \subseteq \cB_\infty$ for all $n$,
thus $\cF \subseteq \cB_\infty$. Since $\cB_\infty$ is a $\sigma$-algebra,
$\sigma(\cF) \subseteq \cB_\infty$.
\end{proof}
\end{refproof}
We are going to use the following
\begin{fact}
\label{fact:finaddtocountadd}
@ -97,14 +103,13 @@ We are going to use the following
$\bP(B_n) \to 0$. Then $\bP$ must be countably additive.
\end{fact}
\begin{proof}
Exercise
Exercise. % TODO
\end{proof}
\begin{proof}[Proof of (2)]
Let's prove that $\lambda$ is finitely additive.
$\lambda(\R^\infty) = \lambda_1(\R^\infty) = 1$.
\begin{refproof}{claim:lambdacountadd}
Let us prove that $\lambda$ is finitely additive.
We have $\lambda(\R^\infty) = \lambda_1(\R^\infty) = 1$ and
$\lambda(\emptyset) = \lambda_1(\emptyset) = 0$.
Suppose $A_1, A_2 \in \cF$ are disjoint.
Suppose that $A_1, A_2 \in \cF$ are disjoint.
Then pick some $n$ such that $A_1, A_2 \in \cF_n$.
Take $C_1, C_2 \in \cB_n$ such that $C_1^\ast = A_1$
and $C_2^\ast = A_2$.
@ -113,7 +118,8 @@ We are going to use the following
by the definition of the finite product measure.
In order to use \autoref{fact:finaddtocountadd},
we need to show that if $B_n \in \cF$ with $B_n \to \emptyset \implies \lambda(B_n) \to 0$.
we need to show that for any sequence $B_n \in \cF$ with $B_n \xrightarrow{n\to \infty} \emptyset$
we have $\lambda(B_n) \xrightarrow{n \to \infty} 0$.
\todo{Finish this}
%TODO
\end{proof}
\end{refproof}

1
inputs/lecture_4.tex Normal file
View file

@ -0,0 +1 @@
\todo{Lecture 4 missing}

View file

@ -1,3 +1,7 @@
% Lecture 5 2023-04-21
\subsection{The laws of large numbers}
We want to show laws of large numbers:
The LHS is random and represents ``sane'' averaging.
The RHS is constant, which we can explicitly compute from the distribution of the RHS.

View file

@ -1,3 +1,4 @@
\todo{Large parts of lecture 6 are missing}
\begin{refproof}{lln}
We want to deduce the SLLN (\autoref{lln}) from \autoref{thm2}.
W.l.o.g.~let us assume that $\bE[X_i] = 0$ (otherwise define $X'_i \coloneqq X_i - \bE[X_i]$).
@ -21,8 +22,7 @@
Thus $a_1 + \ldots + a_n = n S_n - (S1 $ % TODO
\end{subproof}
The claim implies SLLN.
The SLLN follows from the claim.
\end{refproof}
We need the following inequality:
@ -58,7 +58,7 @@ We need the following inequality:
\paragraph{Application of SLLN}
\subsubsection{Application: Renewal Theorem}
\begin{theorem}[Renewal theorem]
Let $X_1,X_2,\ldots$ i.i.d.~random variables with $X_i \ge 0$, $\bE[X_i] = m > 0$. The $X_i$ model waiting times.

179
inputs/lecture_7.tex Normal file
View file

@ -0,0 +1,179 @@
% TODO \begin{goal}
% TODO We want to drop our assumptions on finite mean or variance
% TODO and say something about the behaviour of $ \sum_{n \ge 1} X_n$
% TODO when the $X_n$ are independent.
% TODO \end{goal}
\begin{theorem}[Theorem 3, Kolmogorov's three-series theorem] % Theorem 3
\label{thm3}
Let $X_n$ be a family of independent random variables.
\begin{enumerate}[(a)]
\item Suppose for some $C \ge 0$, the following three series
of numbers converge:
\begin{itemize}
\item $\sum_{n \ge 1} \bP(|X_n| > C)$,
\item $\sum_{n \ge 1} \underbrace{\int_{|X_n| \le C} X_n d\bP}_{\text{\vocab{truncated mean}}}$,
\item $\sum_{n \ge 1} \underbrace{\int_{|X_n| \le C} X_n^2 d\bP - \left( \int_{|X_n| \le C} X_n d\bP \right)^2}_{\text{\vocab{truncated variance} }}$.
\end{itemize}
Then $\sum_{n \ge 1} X_n$ converges almost surely.
\item Suppose $\sum_{n \ge 1} X_n$ converges almost surely.
Then all three series above converge for every $C > 0$.
\end{enumerate}
\end{theorem}
For the proof we'll need a slight generalization of \autoref{thm2}:
\begin{theorem}[Theorem 4] % Theorem 4
\label{thm4}
Let $\{X_n\}_n$ be independent and \vocab{uniformly bounded}
(i.e. $\exists M < \infty : \sup_n \sup_\omega |X_n(\omega)| \le M$).
Then $\sum_{n \ge 1} X_n$ converges almost surely
$\iff$ $\sum_{n \ge 1} \bE(X_n)$ and $\sum_{n \ge 1} \Var(X_n)$
converge.
\end{theorem}
\begin{refproof}{thm3}
Assume, that we have already proved \autoref{thm4}.
We prove part (a) first.
Put $Y_n = X_n \cdot \One_{\{|X_n| \le C\}}$.
Since the $X_n$ are independent, the $Y_n$ are independent as well.
Furthermore, the $Y_n$ are uniformly bounded.
By our assumption, the series
$\sum_{n \ge 1} \int_{|X_n| \le C} X_n d\bP = \sum_{n \ge 1} \bE[Y_n]$
and $\sum_{n \ge 1} \int_{|X_n| \le C} X_n^2 d\bP - \left( \int_{|X_n| \le C} X_n d\bP \right)^2 = \sum_{n \ge 1} \Var(Y_n)$
converges.
By \autoref{thm4} it follows that $\sum_{n \ge 1} Y_n < \infty$
almost surely.
Let $A_n \coloneqq \{\omega : |X_n(\omega)| > C\}$.
Since the first series $\sum_{n \ge 1} \bP(A_n) < \infty$,
by Borel-Cantelli, $\bP[\text{infinitely many $A_n$ occcur}] = 0$.
For the proof of (b), suppose $\sum_{n\ge 1} X_n(\omega) < \infty$
for almost every $\omega$.
Fix an arbitrary $C > 0$.
Define
\[
Y_n(\omega) \coloneqq \begin{cases}
X_n(\omega) & \text{if} |X_n(\omega)| \le C,\\
C &\text{if } |X_n(\omega)| > C.
\end{cases}
\]
Then the $Y_n$ are independent and $\sum_{n \ge 1} Y_n(\omega) < \infty$
almost surely and the $Y_n$ are uniformly bounded.
By \autoref{thm4} $\sum_{n \ge 1} \bE[Y_n]$ and $\sum_{n \ge 1} \Var(Y_n)$
converge.
Define
\[
Z_n(\omega) \coloneqq \begin{cases}
X_n(\omega) &\text{if } |X_n| \le C,\\
-C &\text{if } |X_n| > C.
\end{cases}
\]
Then the $Z_n$ are independent, uniformly bounded and $\sum_{n \ge 1} Z_n(\omega) < \infty$
almost surely.
By \autoref{thm4} we have
$\sum_{n \ge 1} \bE(Z_n) < \infty$
and $\sum_{n \ge 1} \Var(Z_n) < \infty$.
We have
\begin{IEEEeqnarray*}{rCl}
\bE(Y_n) &=& \int_{|X_n| \le C} X_n d \bP + C \bP(|X_n| \ge C),\\
\bE(Z_n) &=& \int_{|X_n| \le C} X_n d \bP - C \bP(|X_n| \ge C).
\end{IEEEeqnarray*}
Since $\bE(Y_n) + \bE(Z_n) = 2 \int_{|X_n| \le C} X_n d\bP$
the second series converges,
and since
$\bE(Y_n) - \bE(Z_n)$ converges, the first series converges.
For the third series, we look at
$\sum_{n \ge 1} \Var(Y_n)$ and
$\sum_{n \ge 1} \Var(Z_n)$ to conclude that this series converges
as well.
\end{refproof}
Recall \autoref{thm2}.
We will see, that the converse of \autoref{thm2} is true if the $X_n$ are uniformly bounded.
More formally:
\begin{theorem}[Theorem 5]
\label{thm5}
Let $X_n$ be a series of independent variables with mean $0$,
that are uniformly bounded.
If $\sum_{n \ge 1} X_n < \infty$ almost surely,
then $\sum_{n \ge 1} \Var(X_n) < \infty$.
\end{theorem}
\begin{refproof}{thm4}
Assume we have proven \autoref{thm5}.
``$\impliedby$'' Assume $\{X_n\} $ are independent, uniformly bounded
and $\sum_{n \ge 1} \bE(X_n) < \infty$ as well as $\sum_{n \ge 1} \Var(X_n) < \infty$.
We need to show that $\sum_{n \ge 1} X_n < \infty$ a.s.
Let $Y_n \coloneqq X_n - \bE(X_n)$.
Then the $Y_n$ are independent, $\bE(Y_n) = 0$ and $\Var(Y_n) = \Var(X_n)$.
By \autoref{thm2} $\sum_{n \ge 1} Y_n < \infty$ a.s.
Thus $\sum_{n \ge 1} X_n < \infty$ a.s.
``$\implies$'' We assume that $\{X_n\}$ are independent, uniformly bounded
and $\sum_{n \ge 1} X_n(\omega) < \infty$ a.s.
We have to show that $\sum_{n \ge 1} \bE(X_n) < \infty$
and $\sum_{n \ge 1} \Var(X_n) < \infty$.
Consider the product space $(\Omega, \cF, \bP) \otimes (\Omega, \cF, \bP)$.
On this product space, we define
$Y_n \left( (\omega, \omega') \right) \coloneqq X_n(\omega)$
and $Z_n \left( (\omega, \omega') \right) \coloneqq X_n(\omega')$.
\begin{claim}
For every fixed $n$, $Y_n$ and $Z_n$ are independent.
\end{claim}
\begin{subproof}
This is obvious, but well prove it carefully here.
\begin{IEEEeqnarray*}{rCl}
&&(\bP \otimes \bP) [Y_n \in (a,b) , Z_n \in (a',b') ]\\
&=& (\bP\otimes\bP) \left( (\omega, \omega') : X_n(\omega) \in (a,b) \land X_n(\omega') \in (a',b') \right)\\
&=& (\bP\otimes\bP)(A \times A') \text{where }
A \coloneqq X_n^{-1}\left( (a,b)\right) \text{ and } A' \coloneqq X_n^{-1}\left( (a',b') \right)\\
&=& \bP(A)\bP(A')
\end{IEEEeqnarray*}
\end{subproof}
Now $\bE[Y_n - Z_n] = 0$ (by definition) and $\Var(Y_n - Z_n) = 2\Var(X_n)$.
Obviously, $(Y_n - Z_n)_{n \ge 1}$ is also uniformly bounded.
\begin{claim}
$\sum_{n \ge 1} (Y_n - Z_n) < \infty$ almost surely
on $(\Omega \otimes \Omega, \cF \otimes\cF, \bP \otimes\bP)$.
\end{claim}
\begin{subproof}
Suppose $\Omega_0 = \{\omega: \sum_{n \ge 1} X_n(\omega) < \infty\}$.
Then $\bP(\Omega_0) = 1$.
Thus $(\bP\otimes\bP)(\Omega_0 \otimes \Omega_0) = 1$.
Furthermore
$\sum_{n \ge 1} \left(Y_n(\omega, \omega') - Z_n(\omega, \omega') \right)= \sum_{n \ge 1} \left(X_n(\omega) - X_n(\omega')\right)$.
Thus $\sum_{n \ge 1} \left( Y_n(\omega, \omega') - Z_n(\omega, \omega') \right) < \infty$ a.s.~on $\Omega_0\otimes\Omega_0$.
\end{subproof}
By \autoref{thm5}, $\sum_{n} \Var(X_n) = \frac{1}{2}\sum_{n \ge 1} \Var(Y_n - Z_n) < \infty$ a.s.
Define $U_n \coloneqq X_n - \bE(X_n)$.
Then $\bE(U_n) = 0$ and the $U_n$ are independent
and uniformly bounded.
We have $\sum_{n} \Var(U_n) = \sum_{n} \Var(X_n) < \infty$.
Thus $\sum_{n} U_n$ converges a.s.~by \autoref{thm2}.
Since by assumption $\sum_{n} X_n < \infty$ a.s.,
it follows that $\sum_{n} \bE(X_n) < \infty$.
\end{refproof}
\begin{remark}
In the proof of \autoref{thm4}
``$\impliedby$'' is just a trivial application of \autoref{thm2}
and uniform boundedness was not used.
The idea of `` $\implies$ '' will lead to coupling. % TODO ?
\end{remark}
% TODO Proof of thm5 in the notes
\begin{example}[Application of \autoref{thm5}]
The series $\sum_{n} \frac{1}{n^{\frac{1}{2} + \epsilon}}$
does not converge for $\epsilon < \frac{1}{2}$.
However
\[
\sum_{n} X_n \frac{1}{n^{\frac{1}{2} + \epsilon}}
\]
where $\bP[X_n = 1] = \bP[X_n = -1] = \frac{1}{2}$
converges almost surely for all $\epsilon > 0$.
And
\[
\sum_{n} X_n \frac{1}{n^{\frac{1}{2} - \epsilon}}
\]
does not converge.
\end{example}

150
inputs/lecture_8.tex Normal file
View file

@ -0,0 +1,150 @@
% Lecture 8 2023-05-02
\subsection{Kolmogorov's 0-1-law}
Some classes of events always have probability $0$ or $1$.
One example of such a 0-1-law is the Borel-Cantelli Lemma
and its inverse statement.
We now want to look at events that capture certain aspects of long term behaviour
of sequences of random variables.
\begin{definition}
Let $X_n, n \in \N$ be a sequence of random variables
on a probability space $(\Omega, \cF, \bP)$.
Let $\cT_i \coloneqq \sigma(X_i, X_{i+1}, \ldots )$
be the $\sigma$-algebra generated by $X_i, X_{i+1}, \ldots$.
Then the \vocab{tail-$\sigma$-algebra} is defined as
\[
\cT \coloneqq \bigcap_{i \in \N} \cT_i.
\]
The events $A \in \cT \subseteq \cF$ are called \vocab[Tail event]{tail events}.
\end{definition}
\begin{remark}
\begin{enumerate}[(i)]
\item Since intersections of arbitrarily many $\sigma$-algebras
is again a $\sigma$-algebra, $\cT$ is indeed a $\sigma$-algebra.
\item We have
\[
\cT = \{A \in \cF ~|~ \forall i \exists B \in \cF^{\otimes \N} : A = \{\omega | (X_i(\omega), X_{i+1}(\omega), \ldots) \in B\} \}. % TODO?
\]
\end{enumerate}
\end{remark}
\begin{example}[What are tail events?]
Let $X_n, n \in \N$ be a sequence of independent random variables on a probability
space $(\Omega, \cF, \bP)$. Then
\begin{enumerate}[(i)]
\item $\left\{\omega | \sum_{n \in \N} X_n(\omega) \text{ converges} \right\}$ is a tail event,
since for all $\omega \in \Omega$ we have
\begin{IEEEeqnarray*}{rCl}
&& \sum_{i=1}^\infty X_i(\omega) \text{ converges}\\
&\iff& \sum_{i=2}^\infty X_i(\omega) \text{ converges}\\
&\iff& \ldots \\
&\iff& \sum_{i=k}^\infty X_i(\omega) \text{ converges}.\\
\end{IEEEeqnarray*}
(Since the $X_i$ are independent, the convergence
of $\sum_{n \in \N} X_n$ is not influenced by $X_1,\ldots, X_k$
for any $k$.)
\item $\left\{\omega | \sum_{n \in \N} X_n(\omega) = c\right\} $
for some $c \in \R$
is not a tail event,
because $\sum_{n \in \N} X_n$ depends on $X_1$.
\item $\{\omega | \lim_{n \to \infty} \frac{1}{n} \sum_{i=1}^{n} X_i(\omega) = c\}$
is a tail event, since
\[
c = \lim_{n \to \infty} \sum_{i=1}^{n} X_i = \underbrace{\lim_{n \to \infty} \frac{1}{n} X_1}_{= 0} + \lim_{n \to \infty} \frac{1}{n} \sum_{i=2}^n X_i = \ldots = \lim_{n \to \infty} \frac{1}{n} \sum_{i=k}^n X_i.
\]
\end{enumerate}
\end{example}
So $\cT$ includes all long term behaviour of $X_n, n \in \N$,
which does not depend on the realisation of the first $k$ random variables
for any $k \in \N$.
\begin{theorem}[Kolmogorov's 0-1 law]
\label{kolmogorov01}
Let $X_n, n \in \N$ be a sequence of independent random variables
and let $\cT$ denote their tail-$\sigma$-algebra.
Then $\cT$ is \vocab{$\bP$-trivial}, i.e.~$\bP[A] \in \{0,1\}$
for all $A \in \cT$.
\end{theorem}
\begin{idea}
The idea behind proving, that a $\cT$ is $\bP$-trivial is to show that
for any $A, B \in \cF$ we have
\[
\bP[A \cap B] = \bP[A] \cdot \bP[B].
\]
Taking $A = B$, it follows that $\bP[A] = \bP[A]^2$, hence $\bP[A] \in \{0,1\}$.
\end{idea}
\begin{refproof}{kolmogorov01}
Let $\cF_n \coloneqq \sigma(X_1,\ldots,X_n)$
and remember that $\cT_{n} = \sigma(X_{n}, X_{n+1},\ldots)$.
The proof rests on two claims:
\begin{claim}
For all $n \ge 1$, $A \in \cF_n$ and $B \in \cT_{n+1}$
we have $\bP[A \cap B] = \bP[A]\bP[B]$.
\end{claim}
\begin{subproof}
This follows from the independence of the $X_i$.
It is
\[
\sigma\left( X_1,\ldots,X_n \right) = \sigma\left(\underbrace{\{X_{1}^{-1}(B_1) \cap \ldots \cap X_n^{-1}(B_n)\} | B_1,\ldots,B_n \in \cB(\R)\}}_{\text{\reflectbox{$\coloneqq$}}\cA} \right).
\]
$\cA$ is a semi-algebra, since
\begin{enumerate}[(i)]
\item $\emptyset, \Omega \in \cA$,
\item $A, B \in \cA \implies A \cap B \in \cA$,
\item for $A \in \cA$, $A^c = \bigsqcup_{i=1}^n A_i$
for disjoint sets $A_1,\ldots,A_n \in \cA$.
\end{enumerate}
Hence it suffices to show the claim for sets $A \in \cA$.
Similarly
\[
\sigma(\cT_{n+1}) = \sigma \left( \underbrace{ \{X_{n+1}^{-1}(M_1) \cap \ldots \cap X_{n+k}^{-1}(M_k) | k \in \N, M_1,\ldots, M_k \in \cB(\R)\}}_{\text{\reflectbox{$\coloneqq$}} \cB} \right).
\]
Again, $\cB$ is closed under intersection.
So let $A \in \cA$ and $B \in \cB$.
Then
\[
\bP[A \cap B] = \bP[A] \cdot \bP[B)
\]
by the independence of $\{X_1,\ldots,X_{n+k}\}$,
and since $A$ only depends on $\{X_1,\ldots,X_n\}$
and $B$ only on $\{X_{n+1},\ldots, X_{n+k}\}$.
\end{subproof}
\begin{claim}
$\bigcup_{n \in \N} \cF_n$ is an algebra
and
\[
\sigma\left( \bigcup_{n \in \N} \cF_n \right) = \sigma(X_1,X_2,\ldots) = \cT_1.
\]
\end{claim}
\begin{subproof}
``$\supseteq$ '' If $A_n \in \sigma(X_n)$, then $A_n \in \cF_n$.
Hence $A_n \in \bigcup_{n \in \N} \cF_n$.
Since $\sigma(X_1,X_2,\ldots)$ is generated by $\{A_n \in \sigma(X_n) : n \in \N\}$,
this also means $\sigma(X_1,X_2,\ldots) \subseteq\sigma\left( \bigcup_{n \in \N} \cF_n \right)$.
``$\subseteq$ '' Since $\cF_n = \sigma(X_1,\ldots,X_n)$,
obviously $\cF_n \subseteq \sigma(X_1,\ldots,X_n)$
for all $n$.
It follows that $\bigcup_{n \in \N} \cF_n \subseteq \sigma(X_1,X_2,\ldots)$.
Hence $\sigma\left( \bigcup_{n \in \N} \cF_n \right) \subseteq\sigma(X_1,X_2,\ldots)$.
\end{subproof}
Now let $T \in \cT$.
Then $T \in \cT_{n+1}$ for any $n$.
Hence $\bP[A \cap T] = \bP[A] \bP[T]$
for all $A \in \cF_n$ by the first claim.
It follows that the same folds for all $A \in \bigcup_{n \in \N} \cF_n$,
hence for all $A \in \sigma\left( \bigcup_{n \in \N} \cF_n \right)$,
and by the second claim for all $A \in \sigma(X_1,X_2,\ldots) = \cT_1$.
But since $T \in \cT$, in particular $T \in \cT_1$,
so by choosing $A = T$, we get
\[
\bP[T] = \bP[T \cap T] = \bP[T]^2
\]
hence $\bP[T] \in \{0,1\}$.
\end{refproof}

146
inputs/lecture_9.tex Normal file
View file

@ -0,0 +1,146 @@
\subsubsection{Application: Percolation}
We will now discuss another application of Kolmogorov's $0-1$-law, percolation.
\begin{definition}[\vocab{Percolation}]
Consider the graph with nodes $\Z^d$, $d \ge 2$, where edges from the lattice are added with probability $p$. The added edges are called \vocab[Percolation!Edge!open]{open};
all other edges are called
\vocab[Percolation!Edge!closed]{closed}.
More formally, we consider
\begin{itemize}
\item $\Omega = \{0,1\}^{\bE_d}$, where $\bE_d$ are all edges in $\Z^d$,
\item $\cF \coloneqq \text{product $\sigma$-algebra}$,
\item $\bP \coloneqq \left(p \underbrace{\delta_{\{1\} }}_{\text{edge is open}} + (1-p) \underbrace{\delta_{\{0\} }}_{\text{edge is absent closed}}\right)^{\otimes \bE_d}$.
\end{itemize}
\end{definition}
\begin{question}
Starting at the origin, what is the probability, that there exists
an infinite path (without moving backwards)?
\end{question}
\begin{definition}
An \vocab{infinite path} consists of an infinite sequence of distinct points
$x_0, x_1, \ldots$
such that $x_n$ is connected to $x_{n+1}$, i.e.~the edge $\{x_n, x_{n+1}\}$ is open.
\end{definition}
Let $C_\infty \coloneqq \{\omega | \text{an infinite path exists}\}$.
\begin{exercise}
Show that changing the presence / absence of finitely many edges
does not change the existence of an infinite path.
Therefore $C_\infty$ is an element of the tail $\sigma$-algebra.
Hence $\bP(C_\infty) \in \{0,1\}$.
\end{exercise}
Obviously, $\bP(C_\infty)$ is monotonic with respect to $p$.
For $d = 2$ it is known that $p = \frac{1}{2}$ is the critical value.
For $d > 2$ this is unknown.
% TODO: more in the notes
We'll get back to percolation later.
\section{Characteristic functions, weak convergence and the central limit theorem}
Characteristic functions are also known as the \vocab{Fourier transform}.
Weak convergence is also known as \vocab{convergence in distribution} / \vocab{convergence in law}.
We will abbreviate the central limit theorem by \vocab{CLT}.
So far we have dealt with the average behaviour,
\[
\frac{\overbrace{X_1 + \ldots + X_n}^{\text{i.i.d.}}}{n} \to \bE(X_1).
\]
We now want to understand \vocab{fluctuations} from the average behaviour,
i.e.\[
X_1 + \ldots + X_n - n \cdot \bE(X_1).
\]
% TODO improve
The question is, what happens on other timescales than $n$?
An example is
\[
\frac{X_1 + \ldots + X_n - n \bE(X_1)}{\sqrt{n} } \xrightarrow{n \to \infty} hv \cN(0, \Var(X_i)) (\ast)
\]
Why is $\sqrt{n}$ the right order? (Handwavey argument)
Suppose $X_1, X_2,\ldots$ are i.i.d. $\cN(0,1)$.
The mean of the l.h.s.~is $0$ and for the variance we get
\[
\Var(\frac{X_1 + \ldots + X_n - n \bE(X_1)}{\sqrt{n} }) = \Var\left( \frac{X_1+ \ldots + X_n}{\sqrt{n} } \right) = \frac{1}{n} \left( \Var(X_1) + \ldots + \Var(X_n) \right) = 1
\]
For the r.h.s.~we get a mean of $0$ and a variance of $1$.
So, to determine what $(\ast)$ could mean, it is necessary that $\sqrt{n}$
is the right scaling.
To define $(\ast)$ we need another notion of convergence.
This will be the weakest notion of convergence, hence it is called
\vocab{weak convergence}.
This notion of convergence will be defined in terms of characteristic functions of Fourier transforms.
\subsection{Characteristic functions and Fourier transform}
Consider $(\R, \cB(\R), \bP)$.
For every $t \in \R$ define a function $\phi(t) \coloneqq \phi_\bP(t) \coloneqq \int_{\R} e^{\i t x} \bP(dx)$.
We have
\[
\phi(t) = \int_{\R} \cos(tx) \bP(dx) + \i \int_{\R} \sin(tx) \bP(dx).
\]
\begin{itemize}
\item Since $|e^{\i t x}| \le 1$ the function $\phi(\cdot )$ is always defined.
\item We have $\phi(0) = 1$.
\item $|\phi(t)| \le \int_{\R} |e^{\i t x} | \bP(dx) = 1$.
\end{itemize}
We call $\phi_{\bP}$ the \vocab{characteristic function} of $\bP$.
\begin{remark}
Suppose $(\Omega, \cF, \bP)$ is an arbitrary probability space and
$X: (\Omega, \cF) \to (\R, \cB(\R))$ is a random variable.
Then we can define
\[
\phi_X(t) \coloneqq \bE[e^{\i t x}] = \int e^{\i t X(\omega)} \bP(d \omega) = \int_{\R} e^{\i t x} \mu(dx) = \phi_\mu(t)
\]
where $\mu = \bP x^{-1}$.
\end{remark}
\begin{theorem}[Inversion formula] % thm1
\label{inversionformula}
Let $(\Omega, \cB(\R), \bP)$ be a probability space.
Let $F$ be the distribution function of $\bP$
(i.e.~$F(x) = \bP((-\infty, x])$ for all $x \in \R$ ).
Then for every $a < b$ we have
\begin{eqnarray}
\frac{F(b) + F(b-)}{2} - \frac{F(a) + F(a-)}{2} = \lim_{T \to \infty} \frac{1}{2 \pi} \int_{-T}^T \frac{e^{-\i t b} - e^{- \i t a}}{- \i t} \phi(t) dt
\label{invf}
\end{eqnarray}
where $F(b-)$ is the left limit.
\end{theorem}
% TODO!
We will prove this later.
\begin{theorem}[Uniqueness theorem] % thm2
\label{charfuncuniqueness}
Let $\bP$ and $\Q$ be two probability measures on $(\R, \cB(\R))$.
Then $\phi_\bP = \phi_\Q \implies \bP = \Q$.
Therefore, probability measures are uniquely determined by their characteristic functions.
Moreover, \eqref{invf} gives a representation of $\bP$ (via $F$)
from $\phi$.
\end{theorem}
\begin{refproof}{charfuncuniqueness}
Assume that we have already shown \autoref{inversionformula}.
Suppose that $F$ and $G$ are the distribution functions of $\bP$ and $\Q$.
Let $a,b \in \R$ with $a < b$.
Assume that $a $ and $b$ are continuity points of both $F$ and $G$.
By \autoref{inversionformula} we have
\begin{IEEEeqnarray*}{rCl}
F(b) - F(a) = G(b) - G(a) \label{eq:charfuncuniquefg}
\end{IEEEeqnarray*}
Since $F$ and $G$ are monotonic, \autoref{eq:charfuncuniquefg}
holds for all $a < b$ outside a countable set.
Take $a_n$ outside this countable set, such that $a_n \ssearrow -\infty$.
Then, \autoref{eq:charfuncuniquefg} implies that
$F(b) - F(a_n) = G(b) - G(a_n)$ hence $F(b) = G(b)$.
Since $F$ and $G$ are right-continuous, it follows that $F = G$.
\end{refproof}

View file

@ -1,3 +1,142 @@
% This section provides a short recap of things that should be known
% from the lecture on stochastics.
\subsection{Notions of convergence}
\begin{definition}
Fix a probability space $(\Omega,\cF,\bP)$.
Let $X, X_1, X_2,\ldots$ be random variables.
\begin{itemize}
\item We say that $X_n$ converges to $X$
\vocab[Convergence!almost surely]{almost surely}
($X_n \xrightarrow{a.s.} X$)
iff
\[
\bP(\{\omega | X_n(\omega) \to X(\omega)\}) = 1.
\]
\item We say that $X_n$ converges to $X$
\vocab[Convergence!in probability]{in probability}
($X_n \xrightarrow{\bP} X$)
iff
\[
\lim_{n \to \infty}\bP[|X_n - X| > \epsilon] = 0
\]
for all $\epsilon > 0$.
\item We say that $X_n$ converges to $X$
\vocab[Convergence!in mean]{in the $p$-th mean}
($X_n \xrightarrow{L^p} X$ )
iff
\[
\bE[|X_n - X|^p] \xrightarrow{n \to \infty} 0.
\]
\end{itemize}
\end{definition}
% TODO Connect to ANaIII
\begin{theorem}
\vspace{10pt}
Let $X$ be a random variable and $X_n, n \in \N$ a sequence of random variables.
Then
\begin{figure}[H]
\centering
\begin{tikzpicture}
\node at (0,1.5) (as) { $X_n \xrightarrow{a.s.} X$};
\node at (1.5,0) (p) { $X_n \xrightarrow{\bP} X$};
\node at (3,1.5) (L1) { $X_n \xrightarrow{L^1} X$};
\draw[double equal sign distance, -implies] (as) -- (p);
\draw[double equal sign distance, -implies] (L1) -- (p);
\end{tikzpicture}
\end{figure}
and none of the other implications hold.
\end{theorem}
\begin{proof}
\begin{claim}
$X_n \xrightarrow{a.s.} X \implies X_n \xrightarrow{\bP} X$.
\end{claim}
\begin{subproof}
$\Omega_0 \coloneqq \{\omega \in \Omega : \lim_{n\to \infty} X_n(\omega) = X(\Omega)\} $.
Let $\epsilon > 0$ and consider $A_n \coloneqq \bigcup_{m \ge n} \{\omega \in \Omega: |X_m(\omega) - X(\Omega)| > \epsilon\}$.
Then $A_n \supseteq A_{n+1} \supseteq \ldots$
Define $A \coloneqq \bigcap_{n \in \N} A_n$.
Then $\bP[A_n] \xrightarrow{n\to \infty} \bP[A]$.
Since $X_n \xrightarrow{a.s.} X$ we have that
$\forall \omega \in \Omega_0 \exists n \in \N \forall m \ge n |X_m(\omega) - X(\omega)| < \epsilon$.
We have $A \subseteq \Omega_0^{c}$, hence $\bP[A_n] \to 0$.
Thus \[
\bP[\{\omega \in \Omega | ~|X_n(\omega) - X(\omega)| > \epsilon\}] < \bP[A_n] \to 0.
\]
\end{subproof}
\begin{claim}
$X_n \xrightarrow{L^1} X \implies X_n\xrightarrow{\bP} X$
\end{claim}
\begin{subproof}
We have $\bE[|X_n - X|] \to 0$.
Suppose there exists an $\epsilon > 0$ such that
$\lim_{n \to \infty} \bP[|X_n - X| > \epsilon] = c > 0$.
We have
\begin{IEEEeqnarray*}{rCl}
\bE[|X_n - X|] &=& \int_\Omega |X_n - X | d\bP\\
&=& \int_{|X_n - X| > \epsilon} |X_n - X| d\bP + \underbrace{\int_{|X_n - X| \le \epsilon} |X_n - X | d\bP}_{\ge 0}\\
&\ge& \epsilon \int_{|X_n -X | > \epsilon} d\bP\\
&=& \epsilon \cdot c > 0 \lightning
\end{IEEEeqnarray*}
\todo{Improve this with Markov}
\end{subproof}
\begin{claim}
$X_n \xrightarrow{\bP} X \notimplies X_n\xrightarrow{L^1} X$
\end{claim}
\begin{subproof}
Take $([0,1], \cB([0,1 ]), \lambda)([0,1], \cB([0,1 ]), \lambda)$
and define $X_n \coloneqq n \One_{[0, \frac{1}{n}]}$.
We have $\bP[|X_n| > \epsilon] = \frac{1}{n}$
for $n$ large enough.
However $\bE[|X_n|] = 1$.
\end{subproof}
\begin{claim}
$X_n \xrightarrow{a.s.} X \notimplies X_n\xrightarrow{L^1} X$.
\end{claim}
\begin{subproof}
We can use the same counterexample as in c).
$\bP[\lim_{n \to \infty} X_n = 0] \ge \bP[X_n = 0] = 1 - \frac{1}{n} \to 0$.
We have already seen, that $X_n$ does not converge in $L_1$.
\end{subproof}
\begin{claim}
$X_n \xrightarrow{L^1} X \notimplies X_n\xrightarrow{a.s.} X$.
\end{claim}
\begin{subproof}
Take $\Omega = [0,1], \cF = \cB([0,1]), \bP = \lambda$.
Define $A_n \coloneqq [j 2^{-k}, (j+1) 2^{-k}]$ where $n = 2^k + j$.
We have
\[
\bE[|X_n|] = \int_{\Omega}|X_n| d\bP = \frac{1}{2^k} \to 0.
\]
However $X_n$ does not converge a.s.~as for all $\omega \in [0,1]$
the sequence $X_n(\omega)$ takes the values $0$ and $1$ infinitely often.
\end{subproof}
\end{proof}
How do we prove that something happens almost surely?
The first thing that should come to mind is:
\begin{lemma}[Borel-Cantelli]
If we have a sequence of events $(A_n)_{n \ge 1}$
such that $\sum_{n \ge 1} \bP(A_n) < \infty$,
then $\bP[ A_n \text{for infinitely many $n$}] = 0$
(more precisely: $\bP[\limsup_{n \to \infty} A_n] = 0$).
For independent events $A_n$ the converse holds as well.
\end{lemma}
\iffalse
\todo{Add more stuff here}
\subsection{Some inequalities}
% TODO: Markov
\begin{theorem}[Chebyshev's inequality] % TODO Proof
Let $X$ be a r.v.~with $\Var(x) < \infty$.
Then $\forall \epsilon > 0 : \bP \left[ \left| X - \bE[X] \right| > \epsilon\right] \le \frac{\Var(x)}{\epsilon^2}$.
@ -8,16 +147,6 @@
We used Chebyshev's inequality. Linearity of $\bE$, $\Var(cX) = c^2\Var(X)$ and $\Var(X_1 +\ldots + X_n) = \Var(X_1) + \ldots + \Var(X_n)$ for independent $X_i$.
How do we prove that something happens almost surely?
\begin{lemma}[Borel-Cantelli]
If we have a sequence of events $(A_n)_{n \ge 1}$
such that $\sum_{n \ge 1} \bP(A_n) < \infty$,
then $\bP[ A_n \text{for infinitely many $n$}] = 0$
(more precisely: $\bP[\limsup_{n \to \infty} A_n] = 0$).
The converse also holds for independent events $A_n$.
\end{lemma}
Modes of covergence: $L^p$, in probability, a.s.
\fi

View file

@ -1,91 +0,0 @@
% TODO \begin{goal}
% TODO We want to drop our assumptions on finite mean or variance
% TODO and say something about the behaviour of $ \sum_{n \ge 1} X_n$
% TODO when the $X_n$ are independent.
% TODO \end{goal}
\begin{theorem}[Theorem 3, Kolmogorov's three-series theorem] % Theorem 3
\label{thm3}
Let $X_n$ be a family of independent random variables.
\begin{enumerate}[(a)]
\item Suppose for some $C \ge 0$, the following three series
of numbers converge:
\begin{itemize}
\item $\sum_{n \ge 1} \bP(|X_n| > C)$,
\item $\sum_{n \ge 1} \underbrace{\int_{|X_n| \le C} X_n d\bP}_{\text{\vocab{truncated mean}}}$,
\item $\sum_{n \ge 1} \underbrace{\int_{|X_n| \le C} X_n^2 d\bP - \left( \int_{|X_n| \le C} X_n d\bP \right)^2}_{\text{\vocab{truncated variance} }}$.
\end{itemize}
Then $\sum_{n \ge 1} X_n$ converges almost surely.
\item Suppose $\sum_{n \ge 1} X_n$ converges almost surely.
Then all three series above converge for every $C > 0$.
\end{enumerate}
\end{theorem}
For the proof we'll need a slight generalization of \autoref{thm2}:
\begin{theorem}[Theorem 4] % Theorem 4
\label{thm4}
Let $\{X_n\}_n$ be independent and \vocab{uniformly bounded}
(i.e. $\exists M < \infty : \sup_n \sup_\omega |X_n(\omega)| \le M$).
Then $\sum_{n \ge 1} X_n$ converges almost surely
$\iff$ $\sum_{n \ge 1} \bE(X_n)$ and $\sum_{n \ge 1} \Var(X_n)$
converge.
\end{theorem}
\begin{refproof}{thm3}
Assume, that we have already proved \autoref{thm4}.
We prove part (a) first.
Put $Y_n = X_n \cdot \One_{\{|X_n| \le C\}}$.
Since the $X_n$ are independent, the $Y_n$ are independent as well.
Furthermore, the $Y_n$ are uniformly bounded.
By our assumption, the series
$\sum_{n \ge 1} \int_{|X_n| \le C} X_n d\bP = \sum_{n \ge 1} \bE[Y_n]$
and $\sum_{n \ge 1} \int_{|X_n| \le C} X_n^2 d\bP - \left( \int_{|X_n| \le C} X_n d\bP \right)^2 = \sum_{n \ge 1} \Var(Y_n)$
converges.
By \autoref{thm4} it follows that $\sum_{n \ge 1} Y_n < \infty$
almost surely.
Let $A_n \coloneqq \{\omega : |X_n(\omega)| > C\}$.
Since the first series $\sum_{n \ge 1} \bP(A_n) < \infty$,
by Borel-Cantelli, $\bP[\text{infinitely many $A_n$ occcur}] = 0$.
For the proof of (b), suppose $\sum_{n\ge 1} X_n(\omega) < \infty$
for almost every $\omega$.
Fix an arbitrary $C > 0$.
Define
\[
Y_n(\omega) \coloneqq \begin{cases}
X_n(\omega) & \text{if} |X_n(\omega)| \le C,\\
C &\text{if } |X_n(\omega)| > C.
\end{cases}
\]
Then the $Y_n$ are independent and $\sum_{n \ge 1} Y_n(\omega) < \infty$
almost surely and the $Y_n$ are uniformly bounded.
By \autoref{thm4} $\sum_{n \ge 1} \bE[Y_n]$ and $\sum_{n \ge 1} \Var(Y_n)$
converge.
Define
\[
Z_n(\omega) \coloneqq \begin{cases}
X_n(\omega) &\text{if } |X_n| \le C,\\
-C &\text{if } |X_n| > C.
\end{cases}
\]
Then the $Z_n$ are independent, uniformly bounded and $\sum_{n \ge 1} Z_n(\omega) < \infty$
almost surely.
By \autoref{thm4} we have
$\sums_{n \ge 1} \bE(Z_n) < \infty$
and $\sums_{n \ge 1} \Var(Z_n) < \infty$.
We have
\[
\bE(Y_n) &=& \int_{|X_n| \le C} X_n d \bP + C \bP(|X_n| \ge C)\\
\bE(Z_n) &=& \int_{|X_n| \le C} X_n d \bP - C \bP(|X_n| \ge C)\\
\]
Since $\bE(Y_n) + \bE(Z_n) = 2 \int_{|X_n| \le C} X_n d\bP$
the second series converges,
and since
$\bE(Y_n) - \bE(Z_n)$ converges, the first series converges.
For the third series, we look at
$\sum_{n \ge 1} \Var(Y_n)$ and
$\sum_{n \ge 1} \Var(Z_n)$ to conclude that this series converges
as well.
\end{refproof}

View file

@ -1,18 +1,47 @@
\documentclass[10pt,ngerman,a4paper, fancyfoot, git]{mkessler-script}
\documentclass[10pt,a4paper, fancyfoot, git, english]{mkessler-script}
\course{Probability Theory}
\lecturer{}
\author{}
\lecturer{Prof.~Chiranjib Mukherjee}
\author{Josia Pietsch}
\usepackage{wtheo}
\begin{document}
\maketitle
%\frontmatter
\cleardoublepage
\tableofcontents
\cleardoublepage
%\mainmatter
\input{inputs/intro.tex}
\section*{Prerequisites}
\input{inputs/lecture_1.tex}
\input{inputs/prerequisites.tex}
\input{inputs/lecture_2.tex}
\input{inputs/lecture_3.tex}
\input{inputs/lecture_5.tex}
\input{inputs/lecture_6.tex}
\input{inputs/lecture_7.tex}
\input{inputs/lecture_8.tex}
\input{inputs/lecture_9.tex}
\input{inputs/lecture_10.tex}
\cleardoublepage
%\backmatter
%\chapter{Appendix}
\cleardoublepage
\printvocabindex
\end{document}

View file

@ -1 +1,95 @@
\ProvidesPackage{wtheo}[2022/02/10 - Style file for notes of Probability Theory]
\usepackage[english]{babel}
\usepackage[cache, number in = section]{fancythm}
\usepackage{mkessler-mathfont}
\usepackage{centernot}
\usepackage{enumerate}
\usepackage{mkessler-todo}
\usepackage[index]{mkessler-vocab}
\usepackage{mkessler-code}
\usepackage{jrpie-math}
\usepackage[normalem]{ulem}
\usepackage{pdflscape}
\usepackage{longtable}
\usepackage{xcolor}
\usepackage{dsfont}
\usepackage{csquotes}
\usepackage{tikz}
\usepackage{tikz-cd}
\usetikzlibrary{arrows}
%\usepackage{wrapfig}
\usepackage{listings}
\usepackage{multirow}
\usepackage{float}
%\usepackage{algorithmicx}
\newcounter{subsubsubsection}[subsubsection]
\renewcommand\thesubsubsubsection{\thesubsubsection.\arabic{subsubsubsection}}
\newcommand\subsubsubsection[1]
{
\stepcounter{subsubsubsection}
\medskip
\textbf{\thesubsubsubsection~#1}
\medskip
}
\newcommand\todoimg[1]
{
\todo{FEHLENDES BILD: #1}
}
\usepackage{siunitx}
% Wenn auf die Klausurrelevanz EXPLIZIT hingewiesen wurde
\newcommand\klausurrelevant{
\footnote{\color{red}klausurrelevant!}
}
\usepackage{acro}
\def\alert#1{{\color{red} #1}}
\usepackage{imakeidx}
\makeindex[name = ccode, title = \texttt{C} functions and macros]
\usepackage{hyperref}
\usepackage[quotation]{knowledge}[22/02/12]
\newcommand\main[1]{\underline{#1}}
\newcommand\usage[1]{\textit{#1}}
\renewcommand\i{\mathrm{\mathbf{i}}}
\newcommand\notimplies{\centernot\implies}
\knowledgestyle{ccode}{color=purple!30!black, index style = usage, wrap = \code}
\knowledgestyle{ccode unknown}{ wrap = \code, color = brown}
\knowledgestyle{ccode unknown cont}{ wrap = \code}
\knowledgestyle{ccode intro}{color=blue, boldface, index style = main, wrap = \code}
\knowledgestyle{autoref link}{autoref link}
\knowledgestyle{autoref target}{autoref target}
\knowledgenewvariant\cc{
default style = {autoref link, ccode},
unknown style = {ccode unknown},
unknown style cont = {ccode unknown cont},
% unknown warning = false,
% unknown diagnose = false,
}
\knowledgenewvariant\ccintro {
auto knowledge = {autoref, scope=document, also now, index, index name = ccode, wrap = \code},
default style = {autoref target, ccode intro},
unknown style = ccode unknown,
unknown style cont = ccode unknown
}
\knowledgevariantmodifier{\intro*\cc}\ccintro
\knowledgevariantmodifier{\cintro*\cc}\ccintro
\hypersetup{colorlinks, citecolor=violet, urlcolor=blue!80!black, linkcolor=red!50!black, pdfauthor=\@author, pdftitle=\ifdef{\@course}{\@course}{\@title}}
\NewFancyTheorem[thmtools = { style = thmredmargin} , group = { big } ]{warning}
\DeclareSimpleMathOperator{Var}