\pagebreak \lecture{22}{2023-07-04}{Introduction Markov Chains II} \section{Markov Chains} \todo{Merge this with the end of lecture 21} \begin{goal} We want to start with the basics of the theory of Markov chains. \end{goal} \begin{example}[Markov chains with two states] Suppose there are two states of a phone line, $0$,``free'', or $1$, ``busy''. We assume that the state only changes at discrete units of time and model this as a sequence of random variables. Assume \begin{IEEEeqnarray*}{rCl} \bP[X_{n+1} = 0 | X_n = 0] &=& p\\ \bP[X_{n+1} = 0 | X_n = 1] &=& (1-p)\\ \bP[X_{n+1} = 1 | X_n = 0] &=& q\\ \bP[X_{n+1} = 1 | X_n = 1] &=& (1-q) \end{IEEEeqnarray*} for some $p,q \in (0,1)$. We can write this as a matrix \begin{IEEEeqnarray*}{rCl} P &=& \begin{pmatrix} p & (1-p) \\ q & (1-q) \end{pmatrix} \end{IEEEeqnarray*} Note that the rows of this matrix sum up to $1$. Additionally, we make the following assmption: Given that at some time $n$, the phone is in state $i \in \{0,1\}$, the behavior of the phone after time $n$ does not depend on the way, the phone reached state $i$. \begin{question} Suppose $X_0 = 0$. What is the probability, that the phone will be free at times $1 \& 2$ and will become busy at time $3$, i.e.~what is $\bP[X_1 = 0, X_2 = 0, X_3 = 1]$? \end{question} We have \begin{IEEEeqnarray*}{rCl} &&\bP[X_1 = 0, X_2 = 0, X_3 = 1]\\ &=& \bP[X_3 = 0 | X_2 = 0, X_1 = 0] \bP[X_2 = 0, X_1 = 0]\\ &=& \bP[X_3 = 0 | X_2 = 0] \bP[X_2 = 0, X_1 = 0]\\ &=& \bP[X_3 = 0 | X_2 = 0] \bP[X_2 = 0 | X_1 = 0] \bP[X_1 = 0]\\ &=& P_{0,1} P_{0,0} P_{0,0} \end{IEEEeqnarray*} \begin{question} Assume $X_0 = 0$. What is $\bP[X_3 = 1]$? \end{question} For $\{X_3 = 1\}$ to happen, we need to look at the following disjoint events: \begin{IEEEeqnarray*}{rCl} \bP(\{X_3 = 1, X_2 = 0, X_1 = 0\}) &=& P_{0,1} P_{0,0}^2,\\ \bP(\{X_3 = 1, X_2 = 0, X_1 = 1\}) &=& P_{0,1}^2 P_{1,0},\\ \bP(\{X_3 = 1, X_2 = 1, X_1 = 0\}) &=& P_{0,0} P_{0,1} P_{1,1},\\ \bP(\{X_3 = 1, X_2 = 1, X_1 = 1\}) &=& P_{0,1} P_{1,1}^2. \end{IEEEeqnarray*} More generally, consider a Matrix $P \in (0,1)^{n \times n}$ whose rows sum up to $1$. Then we get a Markov Chain with $n$ states by defining \[\bP[X_{n+1} = i | X_{n} = j] = P_{i,j}.\] \end{example} \begin{definition} Let $E$ denote a \vocab{discrete state space}, usually $E = \{1,\ldots, N\}$ or $E = \N$ or $E = \Z$. Let $\alpha$ be a probability measure on $E$. We say that $(p_{i,j})_{i \in E, j \in E}$ is a \vocab{transition probability matrix}, if \[ \forall i,j \in E .~p_{i,j} \ge 0 \land \forall i \in E \sum_{j \in E} p_{i,j} = 1. \] Given a triplet $(E, \alpha, P)$, we say that a stochastic process $(X_n)_{n \ge 0}$, i.e.~$X_n: \Omega \to E$, is a \vocab[Markov chain!discrete]% {Markov chain taking values on the state space $E$ with initial distribution $\alpha$ and transition probability matrix $P$}, if the following conditions hold: \begin{enumerate}[(i)] \item $\bP[X_0 = i] = \alpha(i)$ for all $i \in E$, \item \begin{IEEEeqnarray*}{rCl} &&\bP[X_{n+1} = i_{n+1} | X_0 = i_0, X_1 = i_1, \ldots, X_{n} = i_{n}]\\ &=& \bP[X_{n+1} = i_{n+1} | X_n = i_n] \end{IEEEeqnarray*} for all $n = 0, \ldots$, $i_0,\ldots, i_{n+1} \in E$ (provided $\bP[X_0 = i_0, X_1 = i_1, \ldots, X_n = i_n] \neq 0$ ). \end{enumerate} \end{definition} \begin{fact} For all $n \in \N_0$ and $i_0,\ldots,i_n \in E$, we have \[ \bP[X_0 = i_0, X_1 = i_1, \ldots, X_n = i_n] = \alpha(i_0) \cdot p_{i_0,i_1} \cdot p_{i_1,i_2} \cdot \ldots \cdot p_{i_{n-1}, i_n}. \] \end{fact} \begin{fact} For all $n \in \N$, $i_n \in E$, we have \[ \bP[X_n = i_n] = \sum_{i_0, \ldots, i_{n-1} \in E} \alpha_{i_0} p_{i_0,i_1} \cdot \ldots \cdot p_{i_{n-1}, i_n}. \] \end{fact} \begin{example}[Simple random walk on $\Z$] Let $E \coloneqq \Z$, $(\xi_n)_n$ i.i.d.~with $\bP[\xi_i = 1] = \bP[\xi_i = -1] = \frac{1}{2}$. Let $X_0 = 0, X_n = \xi_1 + \ldots + \xi_n$. Let $\alpha = \delta_0 \in M_1(\Z)$. Consider \begin{IEEEeqnarray*}{rCl} P &=& \begin{pmatrix} & \ddots & \ddots & \ddots & & & & & 0\\ \ldots & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & \ldots \\ & \ldots & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & \ldots \\ & & \ldots & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & \ldots \\ 0 & & & & & \ddots & \ddots & \ddots & \\ \end{pmatrix} \end{IEEEeqnarray*} \end{example} % \begin{example} % Consider a game, where a player wins or loses $1 €$ per round of the game. % Let $p$ be the probability of winning. % The player plays until they lose all money. % Let $X_n$ be the capital of the gambler at time $n$. % Define a matrix $P$ % by $P_{0,0} = 1$, $P_{i,i+1} = p$, $P_{i+1,i} = (1-p)$ % and all other entries $0$. % \end{example} \begin{definition} Let $E$ be a complete, separable metric space, $\alpha \in M_1(E)$. For every $x \in E$, let $\mathbf{P}(x, \cdot )$ be a probability measure on $E$.% \footnote{$\mathbf{P}(x,\cdot )$ corresponds to a row of our matrix in the discrete case} Given the triples $(E, \alpha, \{\mathbf{P}(x, \cdot )\}_{x \in E})$, we say that a stochastic process $(X_n)_{n \ge 0}$ is a \vocab[Markov chain]{Markov chain taking values on $E$ % with starting distribution $\alpha$ % and transition probability $\{\mathbf{P}(x, \cdot )\}_{x \in E}$} if \begin{enumerate}[(i)] \item $\bP[X_0 \in \cdot ] = \alpha(\cdot )$, \item For all bounded, measurable $f: E \to \R$, \[ \bE[f(X_{n+1}) | \cF_n] = \bE[f(X_{n+1}) | X_n] = \int_E f(y) \mathbf{P}(X_n, \dif y) \text{ a.s.} \] \end{enumerate} \end{definition} \begin{remark} This agrees with the definition in the discrete case, as all bounded, measurable $f: E\to \R$ can be approximated by simple functions, i.e.~(ii) from the discrete case implies (ii) from the general definition. \end{remark} \begin{notation} If $\{\mathbf{P}(x, \cdot )\}_{x \in E}$ is a transition probability, then for all $f: E \to \R$ bounded and measurable, define $\mathbf{P} : \cB_{\text{bdd}}(E) \to \cB_{\text{bdd}}$ by \[ (\mathbf{P} f)(x) \coloneqq \int_E f(y) \mathbf{P}(x, \dif y). \] \end{notation} We get the following fundamental link between martingales and Markov chains: \begin{theorem} \label{martingalesandmarkovchains} Suppose $(E, \alpha, \{\mathbf{P}(x, \cdot )\}_{x \in E})$ is given. Then a stochastic process $(X_n)_{n \ge 0}$ is a Markov chain iff for every $f: E \to \R$ bounded, measurable, \[ M_n(f) \coloneqq f(X_n) - f(X_0) - \sum_{j=1}^{n-1} (\mathbf{I} - \mathbf{P})f(X_j) \] is a martingale with respect to the canonical filtration of $(X_n)$. \end{theorem} \begin{proof} $\implies$ Fix some bounded, measurable $f : E \to \R$. Then, for all $n$, $M_n(f)$ is bounded and hence $M_n(f) \in L^1$. $M_n(f)$ is $\cF_n$-measurable for all $n \in \N$. In order to prove $\bE[M_{n+1}(f) | \cF_n] = M_n(f)$, it suffices to show $\bE[M_{n+1}(f) - M_n(f) | \cF_n] = 0$ a.s. We have \begin{IEEEeqnarray*}{rCl} \bE[M_{n+1}(f) - M_n(f) | \cF_n] &=& \bE[f(X_{n+1} | \cF_n] - (\mathbf{P}f)(X_n)\\ &\overset{\text{Markov property}}{=}& (\mathbf{P}f)(X_n) - (\mathbf{P}f)(X_n)\\ &=& 0 \end{IEEEeqnarray*} $\impliedby$ Suppose $(M_n(f))_n$ is a martingale for all bounded, measurable $f$. By the martingale property, we have \begin{IEEEeqnarray*}{rCl} \bE[f(X_{n+1}) | X_n] &=& (\mathbf{P}f)(X_n)\\ &=& \int f(y) \mathbf{P}(X_n, \dif y) \end{IEEEeqnarray*} This proves (ii). \end{proof} \begin{definition} Given $\{\mathbf{P}(x, \cdot )\}_{x \in E}$, we say that $f: E \to \R$ is \vocab{harmonic}, iff $f(x) = (\mathbf{P}f)(x)$ for all $x \in E$. We call $f$ \vocab{super-harmonic}, if $(\mathbf{I} - \mathbf{P}) f \ge 0$ and \vocab{sub-harmonic}, if $(\mathbf{I} - \mathbf{P}) f \le 0$. \end{definition} \begin{corollary} If $f$ is (sub/super) harmonic, then for every $(E, \{\mathbf{P}(x, \cdot )\}_{x \in E}, \alpha)$ and every Markov chain $(X_n)_{n \ge 0}$, we have that $f(X_n)$ is a (sub/super) martingale. \end{corollary} \begin{question} Given a set $A$ and a function $f$ on a superset of $A$. Find a function $u$, such that $u$ is harmonic, and $u = f$ on $A$. \end{question} Let $u(x) \coloneqq \bE_x[f(X_{T_A}]$, where $\bE_x$ is the expectation with respect to the Markov chain starting in $x$, and $T_A$ is the stopping time defined by the Markov chain hitting $A$.