\lecture{14}{2023-05-25}{Conditional expectation} \section{Conditional Expectation} \subsection{Introduction} Consider a probability space $(\Omega, \cF, \bP)$ and two events $A, B \in \cF$ with $\bP(B) > 0$. \begin{definition} The \vocab{conditional probability} of $A$ given $B$ is defined as \[ \bP(A | B) \coloneqq \frac{\bP(A \cap B)}{\bP(B)}. \] \end{definition} Suppose we have two random variables $X$ and $Y$ on $\Omega$, such that $X$ takes distinct values $x_1, x_2,\ldots, x_{m}$ and $Y$ takes distinct values $y_1,\ldots, y_n$. Then for this case, define the \vocab{conditional expectation} of $X$ given $Y = y_j$ as \[ \bE[X | Y = y_j] \coloneqq \sum_{i=1}^m x_i \bP[X=x_i | Y = y_j]. \] The random variable $Z = \bE[X | Y]$ is defined as follows: If $Y(\omega) = y_j$ then \[ Z(\omega) \coloneqq \underbrace{\bE[X | Y = y_j]}_{\text{\reflectbox{$\coloneqq$}} z_j}. \] Note that $\Omega_j \coloneqq \{\omega : Y(\omega) = y_j\}$ defines a partition of $\Omega$ and on each $\Omega_j$ (``the $j^{\text{th}}$ $Y$-atom'') $ Z$ is constant. Let $\cG \coloneqq \sigma(Y)$. Then $Z$ is measurable with respect to $\cG$. Furthermore \begin{IEEEeqnarray*}{rCl} \int_{\{Y = y_j\} } Z \dif \bP &=& z_j \int_{\{Y = y_j\}} \dif \bP\\ &=& z_j \bP[Y=y_j]\\ &=&\sum_{i=1}^m x_i \bP[X = x_i | Y = y_j] \bP[Y = y_j]\\ &=&\sum_{i=1}^m x_i \bP[X = x_i, Y = y_j]\\ &=& \int_{\{Y = y_j\}} X \dif \bP. \end{IEEEeqnarray*} Hence \[ \int_{G} Z \dif \bP = \int_{G} X \dif \bP \] for all $G \in \cG$. We now want to generalize this to arbitrary random variables. \begin{theorem} \label{conditionalexpectation} Let $(\Omega, \cF, \bP)$ be a probability space, $X \in L^1(\bP)$ and $\cG \subseteq \cF$ a sub-$\sigma$-algebra. Then there exists a random variable $Z$ such that \begin{enumerate}[(a)] \item $Z$ is $\cG$-measurable and $Z \in L^1(\bP)$, \item $\int_G Z \dif \bP = \int_G X \dif \bP$ for all $G \in \cG$. \end{enumerate} Such a $Z$ is unique up to sets of measure $0$ and is called the \vocab{conditional expectation} of $X$ given the $\sigma$-algebra $\cG$ and written $Z = \bE[X | \cG]$. \end{theorem} \begin{remark} Suppose $\cG = \{\emptyset, \Omega\}$, then \[ \bE[X | \cG] = (\omega \mapsto \bE[X]) \] is a constant random variable. \end{remark} \begin{definition}[Conditional probability] Let $A \subseteq \Omega$ be an event and $\cG \subseteq \cF$ a sub-$\sigma$-algebra. We define the \vocab{conditional probability} of $A$ given $\cG$ by \[ \bP[A | \cG] \coloneqq \bE[\One_A | \cG]. \] \end{definition} \subsection{Existence of Conditional Probability} We will give two different proves of \yaref{conditionalexpectation}. The first one will use orthogonal projections. The second will use the Radon-Nikodym theorem. We'll first do the easy proof, derive some properties and then do the harder proof. \begin{lemma} \label{orthproj} Suppose $H$ is a \vocab{Hilbert space}, i.e.~$H$ is a vector space with an inner product $\langle \cdot, \cdot \rangle_H$ which defines a norm by $\|x\|_H^2 = \langle x, x\rangle_H$ making $H$ a complete metric space. For any $x \in H$ and closed, convex subspace $K \subseteq H$, there exists a unique $z \in K$ such that the following equivalent conditions hold: \begin{enumerate}[(a)] \item $\forall y \in K : \langle x-z, y\rangle_H = 0$, \item $\forall y \in K: \|z-x\|_H \le \|z-x\|_H$. \end{enumerate} \end{lemma} \begin{proof} \notes \end{proof} \begin{refproof}{conditionalexpectation} Almost sure uniqueness of $Z$: Suppose $X \in L^1$ and $Z$ and $Z'$ satisfy (a) and (b). We need to show that $\bP[Z \neq Z'] = 0$. By (a), we have $Z, Z' \in L^1(\Omega, \cG, \bP)$. By (b), $\bE[(Z - Z') \One_G] = 0$ for all $G \in \cG$. Assume that $\bP[Z > Z'] > 0$. Since $\{Z > Z' + \frac{1}{n}\} \uparrow \{Z > Z'\}$, we see that $\bP[Z > Z' + \frac{1}{n}] > 0$ for some $n$. However $\{Z > Z' + \frac{1}{n}\} \in \cG$, which is a contradiction, since \[ \bE[(Z - Z') \One_{Z - Z' > \frac{1}{n}}] \ge \frac{1}{n} \bP[ Z - Z' > \frac{1}{n}] > 0. \] \bigskip Existence of $\bE(X | \cG)$ for $X \in L^2$: Let $H = L^2(\Omega, \cF, \bP)$ and $K = L^2(\Omega, \cG, \bP)$. $K$ is closed, since a pointwise limit of $\cG$-measurable functions is $\cG$ measurable (if it exists). By \yaref{orthproj}, there exists $z \in K$ such that \[\bE[(X - Z)^2] = \inf \{ \bE[(X- W)^2] ~|~ W \in L^2(\cG)\}\] and \begin{equation} \forall Y \in L^2(\cG) : \langle X - Z, Y\rangle = 0. \label{lec13_boxcond} \end{equation} Now, if $G \in \cG$, then $Y \coloneqq \One_G \in L^2(\cG)$ and by \eqref{lec13_boxcond} $\bE[Z \One_G] = \bE[X \One_G]$. \bigskip Existence of $\bE(X | \cG)$ for $X \in L^1$ : Let $X = X^+ - X^-$. It suffices to show (a) and (b) for $X^+$. Choose bounded random variables $X_n \ge 0$ such that $X_n \uparrow X$. Since each $X_n \in L^2$, we can choose a version $Z_n$ of $\bE(X_n | \cG)$. \begin{claim} $0 \overset{\text{a.s.}}{\le} Z_n \uparrow$. \end{claim} \begin{subproof} \notes \end{subproof} Define $Z(\omega) \coloneqq \limsup_{n \to \infty} Z_n(\omega)$. Then $Z$ is $\cG$-measurable and since $Z_n \uparrow Z$, by the \yaref{cmct}, $\bE(Z \One_G) = \bE(X \One_G)$ for all $G \in \cG$. \end{refproof}