diff --git a/inputs/lecture_1.tex b/inputs/lecture_1.tex index 10f049f..3de936a 100644 --- a/inputs/lecture_1.tex +++ b/inputs/lecture_1.tex @@ -46,7 +46,7 @@ First, let us recall some basic definitions: \end{fact} The converse to this fact is also true: \begin{theorem}[Kolmogorov's existence theorem / basic existence theorem of probability theory] - \label{kolmogorovxistence} + \label{kolmogorovexistence} Let $\cF(\R)$ be the set of all distribution functions on $\R$ and let $\cM(\R)$ be the set of all probability measures on $\R$. Then there is a one-to-one correspondence between $\cF(\R)$ and $\cM(\R)$ diff --git a/inputs/lecture_14.tex b/inputs/lecture_14.tex new file mode 100644 index 0000000..a5b9a4a --- /dev/null +++ b/inputs/lecture_14.tex @@ -0,0 +1,173 @@ +\lecture{14}{2023-05-25}{Conditional expectation} + +\section{Conditional expectation} + +\subsection{Introduction} + +Consider a probability space $(\Omega, \cF, \bP)$ +and two events $A, B \in \cF$ with $\bP(B) > 0$. + +\begin{definition} + The \vocab{conditional probability} of $A$ given $B$ is defined as + \[ + \bP(A | B) \coloneqq \frac{\bP(A \cap B)}{\bP(B)}. + \] +\end{definition} + +Suppose we have two random variables $X$ and $Y$ on $\Omega$, +such that $X$ takes distinct values $x_1, x_2,\ldots, x_{m}$ +and $Y$ takes distinct values $y_1,\ldots, y_n$. +Then for this case, define the \vocab{conditional expectation} +of $X$ given $Y = y_j$ as +\[ +\bE[X | Y = y_j] \coloneqq \sum_{i=1}^m x_i \bP[X=x_i | Y = y_j]. +\] + +The random variable $Z = \bE[X | Y]$ +is defined as follows: +If $Y(\omega) = y_j$ then +\[ +Z(\omega) \coloneqq \underbrace{\bE[X | Y = y_j]}_{\text{\reflectbox{$\coloneqq$}} z_j}. +\] +Note that $\Omega_j \coloneqq \{\omega : Y(\omega) = y_j\}$ +defines a partition of $\Omega$ and on each $\Omega_j$ +(``the $j^{\text{th}}$ $Y$-atom'') +$ Z$ is constant. + + +Let $\cG \coloneqq \sigma(Y)$. +Then $Z$ is measurable with respect to $\cG$. +Furthermore +\begin{IEEEeqnarray*}{rCl} + \int_{\{Y = y_j\} } Z \dif \bP &=& z_j \int_{\{Y = y_j\}} \dif \bP\\ + &=& z_j \bP[Y=y_j]\\ + &=&\sum_{i=1}^m x_i \bP[X = x_i | Y = y_j] \bP[Y = y_j]\\ + &=&\sum_{i=1}^m x_i \bP[X = x_i, Y = y_j]\\ + &=& \int_{\{Y = y_j\}} X \dif \bP. +\end{IEEEeqnarray*} +Hence +\[ +\int_{G} Z \dif \bP = \int_{G} X \dif \bP +\] +for all $G \in \cG$. + +We now want to generalize this to arbitrary random variables. +\begin{theorem} + \label{conditionalexpectation} + Let $(\Omega, \cF, \bP)$ be a probability space, $X \in L^1(\bP)$ + and $\cG \subseteq \cF$ a sub-$\sigma$-algebra. + Then there exists a random variable $Z$ + such that + \begin{enumerate}[(a)] + \item $Z$ is $\cG$-measurable and $Z \in L^1(\bP)$, + \item $\int_G Z \dif \bP = \int_G X \dif \bP$ + for all $G \in \cG$. + \end{enumerate} + + Such a $Z$ is unique up to sets of measure $0$ and is + called the \vocab{conditional expectation} of $X$ given + the $\sigma$-algebra $\cG$ and written + $Z = \bE[X | \cG]$. +\end{theorem} +\begin{remark} + Suppose $\cG = \{\emptyset, \Omega\}$, + then + \[ + \bE[X | \cG] = (\omega \mapsto \bE[X]) + \] + is a constant random variable. +\end{remark} + +\paragraph{Plan} +We will give two different proves of \autoref{conditionalexpectation}. +The first one will use orthogonal projections. +The second will use the Radon-Nikodym theorem. +We'll first do the easy proof, derive some properties +and then do the harder proof. + +\begin{lemma} + \label{orthproj} + Suppose $H$ is a \vocab{Hilbert space}, + i.e.~$H$ is a vector space with an inner product $\langle \cdot, \cdot \rangle_H$ which defines a norm by $\|x\|_H^2 = \langle x, x\rangle_H$ + making $H$ a complete metric space. + + For any $x \in H$ and $K \subseteq H$ closed, + there exists a unique $z \in K$ such that the following equivalent conditions hold: + \begin{enumerate}[(a)] + \item $\forall y \in K : \langle x-z, y\rangle_H = 0$, + \item $\forall y \in K: \|z-x\|_H \le \|z-x\|_H$. + \end{enumerate} +\end{lemma} +\begin{proof} + \todo{Notes} +\end{proof} + +\begin{refproof}{conditionalexpectation} + + Almost sure uniqueness of $Z$: + + Suppose $X \in L^1$ and $Z$ and $Z'$ satisfy (a) and (b). + We need to show that $\bP[Z \neq Z'] = 0$. + By (a), we have $Z, Z' \in L^1(\Omega, \cG, \bP)$. + By (b), $\bE[(Z - Z') \One_G] = 0$ for all $G \in \cG$. + + Assume that $\bP[Z > Z'] > 0$. + Since $\{Z > Z' + \frac{1}{n}\} \uparrow \{Z > Z'\}$, + we see that $\bP[Z > Z' + \frac{1}{n}] > 0$ for some $n$. + However $\{Z > Z' + \frac{1}{n}\} \in \cG$, + which is a contradiction, since + \[ + \bE[(Z - Z') \One_{Z - Z' > \frac{1}{n}}] \ge \frac{1}{n} \bP[ Z - Z' > \frac{1}{n}] > 0. + \] + + + \bigskip + Existence of $\bE(X | \cG)$ for $X \in L^2$: + + Let $H = L^2(\Omega, \cF, \bP)$ + and $K = L^2(\Omega, \cG, \bP)$. + + $K$ is closed, since a pointwise limit of $\cG$-measurable + functions is $\cG$ measurable (if it exists). + By \autoref{orthproj}, + there exists $z \in K$ such that + \[\bE[(X - Z)^2] = \inf \{ \bE[(X- W)^2] ~|~ W \in L^2(\cG)\}\] + and + \begin{equation} + \forall Y \in L^2(\cG) : \langle X - Z, Y\rangle = 0. + \label{lec13_boxcond} + \end{equation} + Now, if $G \in \cG$, then $Y \coloneqq \One_G \in L^2(\cG)$ + and by \eqref{lec13_boxcond} $\bE[Z \One_G] = \bE[X \One_G]$. + + + \bigskip + Existence of $\bE(X | \cG)$ for $X \in L^1$ : + + Let $X = X^+ - X^-$. + It suffices to show (a) and (b) for $X^+$. + Choose bounded random variables $X_n \ge 0$ such that $X_n \uparrow X$. + Since each $X_n \in L^2$, we can choose a version $Z_n$ of $\bE(X_n | \cG)$. + + \begin{claim} + $0 \overset{\text{a.s.}}{\le} Z_n \uparrow$. + \end{claim} + \begin{subproof} + \todo{Notes} + \end{subproof} + + Define $Z(\omega) \coloneqq \limsup_{n \to \infty} Z_n(\omega)$. + Then $Z$ is $\cG$-measurable and since $Z_n \uparrow Z$, + by MCT, $\bE(Z \One_G) = \bE(X \One_G)$ for all $G \in \cG$. +\end{refproof} + + + + + + + + + + + diff --git a/inputs/lecture_2.tex b/inputs/lecture_2.tex index 5360dc4..1c38d36 100644 --- a/inputs/lecture_2.tex +++ b/inputs/lecture_2.tex @@ -1,3 +1,4 @@ +\lecture{2}{}{} \section{Independence and product measures} In order to define the notion of independence, we first need to construct diff --git a/inputs/lecture_3.tex b/inputs/lecture_3.tex index 2e8c762..8708d78 100644 --- a/inputs/lecture_3.tex +++ b/inputs/lecture_3.tex @@ -1,3 +1,4 @@ +\lecture{3}{}{} \todo{Lecture 3 needs to be finished} \begin{notation} Let $\cB_n$ denote $\cB(\R^n)$. diff --git a/inputs/lecture_5.tex b/inputs/lecture_5.tex index 5364619..b2f53e1 100644 --- a/inputs/lecture_5.tex +++ b/inputs/lecture_5.tex @@ -1,4 +1,4 @@ -% Lecture 5 2023-04-21 +\lecture{5}{2023-04-21}{} \subsection{The laws of large numbers} diff --git a/probability_theory.tex b/probability_theory.tex index 9ab7263..0b93608 100644 --- a/probability_theory.tex +++ b/probability_theory.tex @@ -37,6 +37,7 @@ \input{inputs/lecture_11.tex} \input{inputs/lecture_12.tex} \input{inputs/lecture_13.tex} +\input{inputs/lecture_14.tex} \cleardoublepage diff --git a/wtheo.sty b/wtheo.sty index e42ca30..1aab83a 100644 --- a/wtheo.sty +++ b/wtheo.sty @@ -103,3 +103,4 @@ \DeclareSimpleMathOperator{Exp} \newcommand*\dif{\mathop{}\!\mathrm{d}} +\newcommand\lecture[3]{{\color{gray}\hfill Lecture #1 (#2)}}