diff --git a/inputs/a_1_counterexamples.tex b/inputs/a_1_counterexamples.tex index 1e2658c..1660476 100644 --- a/inputs/a_1_counterexamples.tex +++ b/inputs/a_1_counterexamples.tex @@ -1,2 +1,6 @@ \section{Counterexamples} +Exercise 4.3 +10.2 + + diff --git a/inputs/intro.tex b/inputs/intro.tex index 038b59b..e2dc4ed 100644 --- a/inputs/intro.tex +++ b/inputs/intro.tex @@ -9,12 +9,13 @@ in the summer term 2023 at the University Münster. \end{warning} These notes contain errors almost surely. -If you find some of them or want to improve something, please send me a message: +If you find some of them or want to improve something, +please send me a message:\\ \texttt{notes\_probability\_theory@jrpie.de}. -Topics of this lecture: +\paragraph{Topics of this lecture} \begin{enumerate}[(1)] \item Limit theorems: Laws of large numbers and the central limit theorem for i.i.d.~sequences, \item Conditional expectation and conditional probabilities, diff --git a/inputs/lecture_1.tex b/inputs/lecture_1.tex index bc0ad45..53e8670 100644 --- a/inputs/lecture_1.tex +++ b/inputs/lecture_1.tex @@ -62,6 +62,7 @@ The converse to this fact is also true: \begin{proof} See theorem 2.4.3 in Stochastik. \end{proof} + \begin{example}[Some important probability distribution functions]\hfill \begin{enumerate}[(1)] \item \vocab{Uniform distribution} on $[0,1]$: diff --git a/inputs/lecture_10.tex b/inputs/lecture_10.tex index 40e082a..d7113c7 100644 --- a/inputs/lecture_10.tex +++ b/inputs/lecture_10.tex @@ -1,4 +1,4 @@ -% lecture 10 - 2023-05-09 +\lecture{10}{2023-05-09}{} % RECAP diff --git a/inputs/lecture_11.tex b/inputs/lecture_11.tex index b620f44..7fc4e23 100644 --- a/inputs/lecture_11.tex +++ b/inputs/lecture_11.tex @@ -1,4 +1,5 @@ -\subsection{The central limit theorem} +\lecture{11}{}{Intuition for the CLT} +\subsection{The Central Limit Theorem} For $X_1, X_2,\ldots$ i.i.d.~we were looking at $S_n \coloneqq \sum_{i=1}^n X_i$. diff --git a/inputs/lecture_12.tex b/inputs/lecture_12.tex index 7834d56..1eaabfc 100644 --- a/inputs/lecture_12.tex +++ b/inputs/lecture_12.tex @@ -1,4 +1,4 @@ -\lecture{12}{2023-05-16}{} +\lecture{12}{2023-05-16}{Proof of the CLT} We now want to prove \autoref{clt}. The plan is to do the following: diff --git a/inputs/lecture_13.tex b/inputs/lecture_13.tex index 2ddbf0b..85574c4 100644 --- a/inputs/lecture_13.tex +++ b/inputs/lecture_13.tex @@ -47,6 +47,18 @@ in this lecture. However, they are quite important. We will now sketch the proof of \autoref{levycontinuity}, details can be found in the notes.\notes +\begin{definition} + Let $(X_n)_n$ be a sequence of random variables. + The distribution of $(X_n)_n$ is called + \vocab[Distribution!tight]{tight} (dt. ``straff''), + if + \[ + \lim_{a \to \infty} \sup_{n \in \N} \bP[|X_n| > a] = 0. + \] +\end{definition} +\begin{example}+[Exercise 8.1] + \todo{Copy} +\end{example} A generalized version of \autoref{levycontinuity} is the following: \begin{theorem}[A generalized version of Levy's continuity \autoref{levycontinuity}] \label{genlevycontinuity} @@ -55,14 +67,14 @@ A generalized version of \autoref{levycontinuity} is the following: for some function $\phi$ on $\R$. Then the following are equivalent: \begin{enumerate}[(a)] - \item The distribution of $X_n$ is \vocab[Distribution!tight]{tight} (dt. ``straff''), - i.e.~$\lim_{a \to \infty} \sup_{n \in \N} \bP[|X_n| > a] = 0$. + \item The distribution of $X_n$ is tight. \item $X_n \xrightarrow{(d)} X$ for some real-valued random variable $X$. \item $\phi$ is the characteristic function of $X$. \item $\phi$ is continuous on all of $\R$. \item $\phi$ is continuous at $0$. \end{enumerate} \end{theorem} +\todo{Proof of \autoref{genlevycontinuity} (Exercise 8.2)} \begin{example} Let $Z \sim \cN(0,1)$ and $X_n \coloneqq n Z$. We have $\phi_{X_n}(t) = \bE[[e^{\i t X_n}] = e^{-\frac{1}{2} t^2 n^2} \xrightarrow{n \to \infty} \One_{\{t = 0\} }$. diff --git a/inputs/lecture_14.tex b/inputs/lecture_14.tex index d189f3e..0d8a1f0 100644 --- a/inputs/lecture_14.tex +++ b/inputs/lecture_14.tex @@ -1,6 +1,6 @@ \lecture{14}{2023-05-25}{Conditional expectation} -\section{Conditional expectation} +\section{Conditional Expectation} \subsection{Introduction} @@ -87,7 +87,7 @@ We now want to generalize this to arbitrary random variables. \] \end{definition} -\subsection{Existence of conditional probability} +\subsection{Existence of Conditional Probability} We will give two different proves of \autoref{conditionalexpectation}. The first one will use orthogonal projections. diff --git a/inputs/lecture_15.tex b/inputs/lecture_15.tex index 235948f..db70cfa 100644 --- a/inputs/lecture_15.tex +++ b/inputs/lecture_15.tex @@ -1,9 +1,9 @@ \lecture{15}{2023-06-06}{} -\subsection{Properties of conditional expectation} +\subsection{Properties of Conditional Expectation} We want to derive some properties of conditional expectation. -\begin{theorem}[Law of total expectation] % Thm 1 +\begin{theorem}[Law of total expectation] \label{ceprop1} \label{totalexpectation} \[ @@ -50,7 +50,6 @@ We want to derive some properties of conditional expectation. \begin{theorem}[Positivity] \label{ceprop4} - % 4 \label{cpositivity} If $X \ge 0$, then $\bE[X | \cG] \ge 0$ a.s. \end{theorem} @@ -66,12 +65,10 @@ We want to derive some properties of conditional expectation. \end{proof} \begin{theorem}[Conditional monotone convergence theorem] \label{ceprop5} - % 5 \label{mcmt} Let $X_n,X \in L^1(\Omega, \cF, \bP)$. Suppose $X_n \ge 0$ with $X_n \uparrow X$. Then $\bE[X_n|\cG] \uparrow \bE[X|\cG]$. - \end{theorem} \begin{proof} Let $Z_n$ be a version of $\bE[X_n | Y]$. @@ -187,12 +184,10 @@ Recall \] \end{theorem} \begin{proof} - Similar to the proof of Hölder's inequality. \todo{Exercise} \end{proof} \begin{theorem}[Tower property] - % 10 \label{ceprop10} \label{cetower} Suppose $\cF \supset \cG \supset \cH$ are sub-$\sigma$-algebras. @@ -202,11 +197,17 @@ Recall \] \end{theorem} \begin{proof} - \todo{Exercise} + By definition, $\bE[\bE[X | \cG] | \cH]$ is $\cH$-measurable. + For any $H \in \cH$, we have + \begin{IEEEeqnarray*}{rCl} + \int_H \bE[\bE[X | \cG] | \cH] \dif \bP + &=& \int_{H} \bE[X | \cG] \dif \bP\\ + &=& \int_H X \dif \bP. + \end{IEEEeqnarray*} + Hence $\bE[\bE[X | \cG] | \cH] \overset{\text{a.s.}}{=} \bE[X | \cH]$. \end{proof} \begin{theorem}[Taking out what is known] - % 11 \label{ceprop11} \label{takingoutwhatisknown} diff --git a/inputs/lecture_16.tex b/inputs/lecture_16.tex index e95dd4e..6171945 100644 --- a/inputs/lecture_16.tex +++ b/inputs/lecture_16.tex @@ -32,7 +32,7 @@ \end{refproof} -\subsection{The Radon Nikodym theorem} +\subsection{The Radon Nikodym Theorem} First, let us recall some basic facts: \begin{fact} diff --git a/inputs/lecture_17.tex b/inputs/lecture_17.tex index c8ad703..97f69fc 100644 --- a/inputs/lecture_17.tex +++ b/inputs/lecture_17.tex @@ -1,6 +1,6 @@ \lecture{17}{2023-06-15}{} -\subsection{Doob's martingale convergence theorem} +\subsection{Doob's Martingale Convergence Theorem} \begin{definition}[Stochastic process] @@ -37,7 +37,7 @@ Then $(Y_n)_{n \ge 1}$ is also a (sub/super-) martingale. \end{lemma} \begin{proof} - Exercise. \todo{Copy} + Exercise. \todo{Copy Exercise 10.4} \end{proof} \begin{remark} The assumption of $K_n$ being constant can be weakened to diff --git a/inputs/lecture_18.tex b/inputs/lecture_18.tex index a0cd1ce..b505b05 100644 --- a/inputs/lecture_18.tex +++ b/inputs/lecture_18.tex @@ -13,7 +13,7 @@ Hence the same holds for submartingales, i.e. a.s.~to a finite limit, which is a.s.~finite. \end{lemma} -\subsection{Doob's $L^p$ inequality} +\subsection{Doob's $L^p$ Inequality} \begin{question} diff --git a/inputs/lecture_19.tex b/inputs/lecture_19.tex index f66d521..69e1f0a 100644 --- a/inputs/lecture_19.tex +++ b/inputs/lecture_19.tex @@ -1,6 +1,6 @@ \lecture{19}{2023-06-22}{} -\subsection{Uniform integrability} +\subsection{Uniform Integrability} \begin{example} Let $\Omega = [0,1]$, $\cF = \cB$ @@ -198,7 +198,7 @@ However, some subsets can be easily described, e.g. \] \end{proof} -\subsection{Martingale convergence theorems in $L^p, p \ge 1$} +\subsection{Martingale Convergence Theorems in \texorpdfstring{$L^p, p \ge 1$}{$Lp, p >= 1$}} Let $(\Omega, \cF, \bP)$ as always and let $(\cF_n)_n$ always be a filtration. diff --git a/inputs/lecture_2.tex b/inputs/lecture_2.tex index 1c38d36..996ea5f 100644 --- a/inputs/lecture_2.tex +++ b/inputs/lecture_2.tex @@ -1,5 +1,5 @@ \lecture{2}{}{} -\section{Independence and product measures} +\section{Independence and Product Measures} In order to define the notion of independence, we first need to construct product measures. diff --git a/inputs/lecture_20.tex b/inputs/lecture_20.tex index 9207a55..c23d6a7 100644 --- a/inputs/lecture_20.tex +++ b/inputs/lecture_20.tex @@ -66,7 +66,7 @@ Hence \[ - \|X_n - X\|_{L^p} % + \|X_n - X\|_{L^p} % \le \|X_n - X_n'\|_{L^p} + \|X_n' - X'\|_{L^p} + \|X - X'\|_{L^p} % \le 3 \epsilon. \] @@ -118,7 +118,7 @@ we need the following theorem, which we won't prove here: we get the convergence. \end{refproof} -\subsection{Stopping times} +\subsection{Stopping Times} \begin{definition}[Stopping time] A random variable $T: \Omega \to \N_0 \cup \{\infty\}$ on a filtered probability space $(\Omega, \cF, \{\cF_n\}_n, \bP)$ is called a \vocab{stopping time}, @@ -128,7 +128,6 @@ we need the following theorem, which we won't prove here: \] for all $n \in \N$. Equivalently, $\{T = n\} \in \cF_n$ for all $n \in \N$. - \end{definition} \begin{example} @@ -152,7 +151,6 @@ we need the following theorem, which we won't prove here: T \coloneqq \sup \{n \in \N : X_n \in A\} \] is not a stopping time. - \end{example} @@ -167,7 +165,7 @@ we need the following theorem, which we won't prove here: is a stopping time. \end{example} -\begin{example} +\begin{fact} If $T_1, T_2$ are stopping times with respect to the same filtration, then \begin{itemize} @@ -176,37 +174,32 @@ we need the following theorem, which we won't prove here: \item $\max \{T_1, T_2\}$ \end{itemize} are stopping times. - +\end{fact} +\begin{warning} Note that $T_1 - T_2$ is not a stopping time. - -\end{example} +\end{warning} \begin{remark} - There are two ways to interpret the interaction between a stopping time $T$ - and a stochastic process $(X_n)_n$. + There are two ways to look at the interaction between a stopping time $T$ + and a stochastic process $(X_n)_n$: \begin{itemize} - \item The behaviour of $ X_n$ until $T$, - i.e.~looking at the \vocab{stopped process} + \item The behaviour of $ X_n$ until $T$, i.e. \[ X^T \coloneqq \left(X_{T \wedge n}\right)_{n \in \N} - \]. + \] + is called the \vocab{stopped process}. \item The value of $(X_n)_n)$ at time $T$, i.e.~looking at $X_T$. \end{itemize} \end{remark} \begin{example} If we look at a process - \[ - S_n = \sum_{i=1}^{n} X_i - \] - for some $(X_n)_n$, then - \[ - S^T = (\sum_{i=1}^{T \wedge n} X_i)_n - \] + \[ S_n = \sum_{i=1}^{n} X_i \] + for some $(X_n)_n$, + then + \[ S^T = (\sum_{i=1}^{T \wedge n} X_i)_n \] and - \[ - S_T = \sum_{i=1}^{T} X_i. - \] + \[ S_T = \sum_{i=1}^{T} X_i. \] \end{example} \begin{theorem} @@ -242,7 +235,6 @@ we need the following theorem, which we won't prove here: = 0 \text{ if $(X_n)_n$ is a martingale}. \end{cases} \end{IEEEeqnarray*} - \end{proof} \begin{remark} @@ -256,7 +248,6 @@ we need the following theorem, which we won't prove here: = \bE[X_0] & \text{ martingale}. \end{cases} \] - However if $T$ is not bounded, this does not hold in general. \end{remark} \begin{example} @@ -291,7 +282,7 @@ we need the following theorem, which we won't prove here: $\bE[X_T] = \bE[X_0]$. \end{theorem} \begin{proof} - (i) was dealt with in \autoref{roptionalstoppingi}. + (i) was already done in \autoref{roptionalstoppingi}. (ii): Since $(X_n)_n$ is bounded, we get that \begin{IEEEeqnarray*}{rCl} @@ -312,7 +303,6 @@ we need the following theorem, which we won't prove here: \end{IEEEeqnarray*} Thus, we can apply (ii). - The statement about martingales follows from applying this to $(X_n)_n$ and $(-X_n)_n$, which are both supermartingales. diff --git a/inputs/lecture_22.tex b/inputs/lecture_22.tex index ad0d174..2693348 100644 --- a/inputs/lecture_22.tex +++ b/inputs/lecture_22.tex @@ -1,4 +1,4 @@ -\lecture{22}{2023-07-04}{Intro Markov Chains II} +\lecture{22}{2023-07-04}{Introduction Markov Chains II} \begin{goal} We want to start with the basics of the theory of Markov chains. \end{goal} diff --git a/inputs/lecture_5.tex b/inputs/lecture_5.tex index b2f53e1..6a3ddc3 100644 --- a/inputs/lecture_5.tex +++ b/inputs/lecture_5.tex @@ -1,5 +1,5 @@ \lecture{5}{2023-04-21}{} -\subsection{The laws of large numbers} +\subsection{The Laws of Large Numbers} We want to show laws of large numbers: diff --git a/inputs/lecture_6.tex b/inputs/lecture_6.tex index ed4dd95..c42c22d 100644 --- a/inputs/lecture_6.tex +++ b/inputs/lecture_6.tex @@ -1,3 +1,4 @@ +\lecture{6}{}{} \todo{Large parts of lecture 6 are missing} \begin{refproof}{lln} We want to deduce the SLLN (\autoref{lln}) from \autoref{thm2}. diff --git a/inputs/lecture_7.tex b/inputs/lecture_7.tex index 6bb24c9..4bcb241 100644 --- a/inputs/lecture_7.tex +++ b/inputs/lecture_7.tex @@ -1,9 +1,10 @@ -% TODO \begin{goal} -% TODO We want to drop our assumptions on finite mean or variance -% TODO and say something about the behaviour of $ \sum_{n \ge 1} X_n$ -% TODO when the $X_n$ are independent. -% TODO \end{goal} -\begin{theorem}[Theorem 3, Kolmogorov's three-series theorem] % Theorem 3 +\lecture{7}{}{Kolmogorov's three series theorem} +\begin{goal} + We want to drop our assumptions on finite mean or variance + and say something about the behaviour of $ \sum_{n \ge 1} X_n$ + when the $X_n$ are independent. +\end{goal} +\begin{theorem}[Kolmogorov's three-series theorem] % Theorem 3 \label{thm3} Let $X_n$ be a family of independent random variables. \begin{enumerate}[(a)] @@ -20,7 +21,7 @@ \end{enumerate} \end{theorem} For the proof we'll need a slight generalization of \autoref{thm2}: -\begin{theorem}[Theorem 4] % Theorem 4 +\begin{theorem} %[Theorem 4] \label{thm4} Let $\{X_n\}_n$ be independent and \vocab{uniformly bounded} (i.e. $\exists M < \infty : \sup_n \sup_\omega |X_n(\omega)| \le M$). @@ -166,14 +167,13 @@ More formally: However \[ \sum_{n} X_n \frac{1}{n^{\frac{1}{2} + \epsilon}} - \] + \] where $\bP[X_n = 1] = \bP[X_n = -1] = \frac{1}{2}$ converges almost surely for all $\epsilon > 0$. And \[ \sum_{n} X_n \frac{1}{n^{\frac{1}{2} - \epsilon}} - \] + \] does not converge. - \end{example} diff --git a/inputs/lecture_8.tex b/inputs/lecture_8.tex index c47f95e..a8c2d85 100644 --- a/inputs/lecture_8.tex +++ b/inputs/lecture_8.tex @@ -24,7 +24,7 @@ of sequences of random variables. is again a $\sigma$-algebra, $\cT$ is indeed a $\sigma$-algebra. \item We have \[ - \cT = \{A \in \cF ~|~ \forall i ~ \exists B \in \cB(\R)^{\otimes \N} : A = \{\omega | (X_i(\omega), X_{i+1}(\omega), \ldots) \in B\} \}. % TODO? + \cT = \{A \in \cF ~|~ \forall i ~ \exists B \in \cB(\R)^{\otimes \N} : A = \{\omega | (X_i(\omega), X_{i+1}(\omega), \ldots) \in B\} \}. \] \end{enumerate} \end{remark} @@ -146,5 +146,3 @@ for any $k \in \N$. \] hence $\bP[T] \in \{0,1\}$. \end{refproof} - - diff --git a/inputs/lecture_9.tex b/inputs/lecture_9.tex index f936429..15f6e60 100644 --- a/inputs/lecture_9.tex +++ b/inputs/lecture_9.tex @@ -1,6 +1,6 @@ +\lecture{9}{}{Percolation, Introduction to characteristic functions} \subsubsection{Application: Percolation} - We will now discuss another application of Kolmogorov's $0-1$-law, percolation. \begin{definition}[\vocab{Percolation}] @@ -41,7 +41,7 @@ For $d > 2$ this is unknown. We'll get back to percolation later. -\section{Characteristic functions, weak convergence and the central limit theorem} +\section{Characteristic Functions, Weak Convergence and the Central Limit Theorem} % Characteristic functions are also known as the \vocab{Fourier transform}. %Weak convergence is also known as \vocab{convergence in distribution} / \vocab{convergence in law}. @@ -77,7 +77,7 @@ This will be the weakest notion of convergence, hence it is called \vocab{weak convergence}. This notion of convergence will be defined in terms of characteristic functions of Fourier transforms. -\subsection{Characteristic functions and Fourier transform} +\subsection{Characteristic Functions and Fourier Transform} \begin{definition} Consider $(\R, \cB(\R), \bP)$. @@ -152,4 +152,3 @@ We will prove this later. $F(b) - F(a_n) = G(b) - G(a_n)$ hence $F(b) = G(b)$. Since $F$ and $G$ are right-continuous, it follows that $F = G$. \end{refproof} - diff --git a/inputs/prerequisites.tex b/inputs/prerequisites.tex index 5d2ed8f..c51bf7c 100644 --- a/inputs/prerequisites.tex +++ b/inputs/prerequisites.tex @@ -1,7 +1,7 @@ -% This section provides a short recap of things that should be known -% from the lecture on stochastics. +This section provides a short recap of things that should be known +from the lecture on stochastic. -\subsection{Notions of convergence} +\subsection{Notions of Convergence} \begin{definition} Fix a probability space $(\Omega,\cF,\bP)$. Let $X, X_1, X_2,\ldots$ be random variables. @@ -147,7 +147,29 @@ The first thing that should come to mind is: We used Chebyshev's inequality. Linearity of $\bE$, $\Var(cX) = c^2\Var(X)$ and $\Var(X_1 +\ldots + X_n) = \Var(X_1) + \ldots + \Var(X_n)$ for independent $X_i$. - - -Modes of covergence: $L^p$, in probability, a.s. \fi + +\subsection{Some Facts from Measure Theory} +\begin{fact}+[Finite measures are {\vocab[Measure]{regular}}, Exercise 3.1] + Let $\mu$ be a finite measure on $(\R, \cB(\R))$. + Then for all $\epsilon > 0$, + there exists a compact set $K \in \cB(\R)$ such that + $\mu(K) > \mu(\R) - \epsilon$. +\end{fact} +\begin{proof} + We have $[-k,k] \uparrow \R$, hence $\mu([-k,k]) \uparrow \mu(\R)$. +\end{proof} + +\begin{theorem}[Riemann-Lebesgue] + \label{riemann-lebesgue} + Let $f: \R \to \R$ be integrable. + Then + \[ + \lim_{n \to \infty} \int_{\R} f(x) \cos(n x) \lambda(\dif x) = 0. + \] +\end{theorem} + + + + + diff --git a/probability_theory.tex b/probability_theory.tex index 44dc92d..300a893 100644 --- a/probability_theory.tex +++ b/probability_theory.tex @@ -1,4 +1,4 @@ -\documentclass[10pt,a4paper, fancyfoot, git, english]{mkessler-script} +\documentclass[fancyfoot, git, english]{mkessler-script} \course{Probability Theory} \lecturer{Prof.~Chiranjib Mukherjee} @@ -50,8 +50,10 @@ \cleardoublepage -%\backmatter -%\chapter{Appendix} +\begin{landscape} +\section{Appendix} +\input{inputs/a_0_distributions.tex} +\end{landscape} \cleardoublepage \printvocabindex diff --git a/wtheo.sty b/wtheo.sty index c3e5b72..a02953b 100644 --- a/wtheo.sty +++ b/wtheo.sty @@ -11,6 +11,7 @@ \usepackage[normalem]{ulem} \usepackage{pdflscape} \usepackage{longtable} +\usepackage{colortbl} \usepackage{xcolor} \usepackage{dsfont} \usepackage{csquotes} @@ -98,9 +99,15 @@ \NewFancyTheorem[thmtools = { style = thmredmargin} , group = { big } ]{warning} \DeclareSimpleMathOperator{Var} -\DeclareSimpleMathOperator{Bin} -\DeclareSimpleMathOperator{Ber} -\DeclareSimpleMathOperator{Exp} +\DeclareSimpleMathOperator{Bin} % binomial distribution +\DeclareSimpleMathOperator{Geo} % geometric distribution +\DeclareSimpleMathOperator{Poi} % Poisson distribution + +\DeclareSimpleMathOperator{Unif} % uniform distribution +\DeclareSimpleMathOperator{Exp} % exponential distribution +\DeclareSimpleMathOperator{Cauchy} % Cauchy distribution +% \DeclareSimpleMathOperator{Normal} % normal distribution + \newcommand*\dif{\mathop{}\!\mathrm{d}} \newcommand\lecture[3]{\hrule{\color{darkgray}\hfill{\tiny[Lecture #1, #2]}}}