s23-probability-theory/inputs/lecture_22.tex

\pagebreak
\lecture{22}{2023-07-04}{Introduction Markov Chains II}
\section{Markov Chains}
\todo{Merge this with the end of lecture 21}

\begin{goal}
    We want to start with the basics of the theory of Markov chains.
\end{goal}

 \begin{example}[Markov chains with two states]
     Suppose there are two states of a phone line,
     $0$,``free'', or $1$, ``busy''.
     We assume that the state only changes at discrete units of time
     and model this as a sequence of random variables.
     Assume
     \begin{IEEEeqnarray*}{rCl}
         \bP[X_{n+1} = 0 | X_n = 0] &=& p\\
         \bP[X_{n+1} = 0 | X_n = 1] &=& (1-p)\\
         \bP[X_{n+1} = 1 | X_n = 0] &=& q\\
         \bP[X_{n+1} = 1 | X_n = 1] &=& (1-q)
     \end{IEEEeqnarray*}
     for some $p,q \in (0,1)$.
     We can write this as a matrix
     \begin{IEEEeqnarray*}{rCl}
         P &=& \begin{pmatrix}
             p & (1-p) \\
             q & (1-q)
         \end{pmatrix}
     \end{IEEEeqnarray*}
     Note that the rows of this matrix sum up to $1$.

     Additionally, we make the following assmption:
     Given that at some time $n$, the phone is in state $i \in \{0,1\}$,
     the behavior of the phone after time $n$ does not depend
     on the way, the phone reached state $i$.

     \begin{question}
         Suppose $X_0 = 0$.
         What is the probability, that the phone will be free at times
         $1 \& 2$ and will become busy at time $3$,
         i.e.~what is  $\bP[X_1 = 0, X_2 = 0, X_3 = 1]$?
     \end{question}
     We have
     \begin{IEEEeqnarray*}{rCl}
         &&\bP[X_1 = 0, X_2 = 0, X_3 = 1]\\
         &=& \bP[X_3 = 0 | X_2 = 0, X_1 = 0] \bP[X_2 = 0, X_1 = 0]\\
         &=& \bP[X_3 = 0 | X_2 = 0] \bP[X_2 = 0, X_1 = 0]\\
         &=& \bP[X_3 = 0 | X_2 = 0] \bP[X_2 = 0 |  X_1 = 0] \bP[X_1 = 0]\\
         &=& P_{0,1} P_{0,0} P_{0,0}
     \end{IEEEeqnarray*}

     \begin{question}
         Assume $X_0 = 0$. What is $\bP[X_3 = 1]$?
     \end{question}
     For $\{X_3 = 1\}$ to happen, we need to look at the following
     disjoint events:
    \begin{IEEEeqnarray*}{rCl}
        \bP(\{X_3 = 1, X_2 = 0, X_1 = 0\}) &=& P_{0,1} P_{0,0}^2,\\
        \bP(\{X_3 = 1, X_2 = 0, X_1 = 1\}) &=& P_{0,1}^2 P_{1,0},\\
        \bP(\{X_3 = 1, X_2 = 1, X_1 = 0\}) &=& P_{0,0} P_{0,1} P_{1,1},\\
        \bP(\{X_3 = 1, X_2 = 1, X_1 = 1\}) &=& P_{0,1} P_{1,1}^2.
    \end{IEEEeqnarray*}

     More generally, consider a Matrix $P \in  (0,1)^{n \times n}$
     whose rows sum up to $1$.
     Then we get a Markov Chain with $n$ states
     by defining
     \[\bP[X_{n+1} = i | X_{n} = j] = P_{i,j}.\]
 \end{example}

\begin{definition}
    Let $E$ denote a \vocab{discrete state space},
    usually $E = \{1,\ldots, N\}$
    or $E = \N$ or $E = \Z$.

    Let $\alpha$ be a probability measure on $E$.
    We say that $(p_{i,j})_{i \in  E, j \in E}$ is a
    \vocab{transition probability matrix}, if
    \[
    \forall  i,j \in E .~p_{i,j} \ge 0 \land \forall  i \in E \sum_{j \in E} p_{i,j} = 1.
    \]

    Given a triplet $(E, \alpha, P)$, we say that a stochastic process $(X_n)_{n \ge 0}$,
    i.e.~$X_n: \Omega \to E$, is a \vocab[Markov chain!discrete]%
    {Markov chain taking values
    on the state space $E$
    with initial distribution $\alpha$
    and transition probability matrix $P$},
    if the following conditions hold:
    \begin{enumerate}[(i)]
        \item $\bP[X_0 = i] = \alpha(i)$
            for all $i \in E$,

        \item \begin{IEEEeqnarray*}{rCl}
            &&\bP[X_{n+1} = i_{n+1} | X_0 = i_0, X_1 = i_1, \ldots, X_{n} = i_{n}]\\
            &=& \bP[X_{n+1} = i_{n+1} | X_n = i_n]
            \end{IEEEeqnarray*}
            for all $n = 0, \ldots$, $i_0,\ldots, i_{n+1} \in E$
            (provided $\bP[X_0 = i_0, X_1 = i_1, \ldots, X_n = i_n] \neq 0$ ).
    \end{enumerate}

\end{definition}
\begin{fact}
    For all $n \in  \N_0$ and $i_0,\ldots,i_n \in E$, we have
    \[
    \bP[X_0 = i_0, X_1 = i_1, \ldots, X_n = i_n] =
    \alpha(i_0) \cdot p_{i_0,i_1} \cdot p_{i_1,i_2} \cdot \ldots \cdot p_{i_{n-1}, i_n}.
    \]
\end{fact}
\begin{fact}
    For all $n \in \N$, $i_n \in E$, we have
    \[
    \bP[X_n = i_n] = \sum_{i_0, \ldots, i_{n-1} \in E} \alpha_{i_0} p_{i_0,i_1} \cdot \ldots \cdot p_{i_{n-1}, i_n}.
    \]
\end{fact}
\begin{example}[Simple random walk on $\Z$]
    Let $E \coloneqq \Z$, $(\xi_n)_n$ i.i.d.~with $\bP[\xi_i = 1] = \bP[\xi_i = -1] = \frac{1}{2}$.
    Let $X_0 = 0, X_n = \xi_1 + \ldots + \xi_n$.

    Let $\alpha = \delta_0 \in  M_1(\Z)$.
    Consider
    \begin{IEEEeqnarray*}{rCl}
        P &=&
        \begin{pmatrix}
            & \ddots & \ddots & \ddots &  & & & & 0\\
            \ldots & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & \ldots \\
            & \ldots & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & \ldots \\
            & & \ldots & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & \ldots \\
            0 & & & & & \ddots & \ddots & \ddots & \\
        \end{pmatrix}
    \end{IEEEeqnarray*}

\end{example}

% \begin{example}
%     Consider a game, where a player wins or loses $1 €$ per round of the game.
%     Let $p$ be the probability of winning.
%     The player plays until they lose all money.
%     Let $X_n$ be the capital of the gambler at time $n$.
%     Define a matrix $P$
%     by $P_{0,0} = 1$, $P_{i,i+1} = p$, $P_{i+1,i} = (1-p)$
%     and all other entries $0$.
% \end{example}

\begin{definition}
    Let $E$ be a complete, separable metric space,
    $\alpha \in  M_1(E)$.
    For every $x \in E$,
    let $\mathbf{P}(x, \cdot )$ be a probability measure on $E$.%
    \footnote{$\mathbf{P}(x,\cdot )$ corresponds to a row of our matrix in the discrete case}

    Given the triples $(E, \alpha, \{\mathbf{P}(x, \cdot )\}_{x \in E})$,
    we say that a stochastic process $(X_n)_{n \ge 0}$
    is a \vocab[Markov chain]{Markov chain taking values on $E$ %
        with starting distribution $\alpha$ %
        and transition probability $\{\mathbf{P}(x, \cdot )\}_{x \in E}$}
    if
    \begin{enumerate}[(i)]
        \item $\bP[X_0 \in  \cdot ] = \alpha(\cdot )$,
        \item For all bounded, measurable  $f: E \to \R$,
            \[
            \bE[f(X_{n+1}) | \cF_n] = \bE[f(X_{n+1}) | X_n]
            = \int_E f(y) \mathbf{P}(X_n, \dif y) \text{ a.s.}
            \]
    \end{enumerate}

\end{definition}
\begin{remark}
    This agrees with the definition in the discrete case,
    as all bounded, measurable $f: E\to \R$ can be approximated
    by simple functions,
    i.e.~(ii) from the discrete case implies (ii) from the general definition.
\end{remark}
\begin{notation}
    If $\{\mathbf{P}(x, \cdot )\}_{x \in E}$ is a transition probability,
    then for all $f: E \to  \R$ bounded and measurable,
    define  $\mathbf{P} : \cB_{\text{bdd}}(E) \to  \cB_{\text{bdd}}$
    by
    \[
        (\mathbf{P} f)(x) \coloneqq \int_E f(y) \mathbf{P}(x, \dif y).
    \]
\end{notation}
We get the following fundamental link between martingales and Markov chains:
\begin{theorem}
    \label{martingalesandmarkovchains}
    Suppose $(E, \alpha, \{\mathbf{P}(x, \cdot )\}_{x \in E})$
    is given.
    Then a stochastic process $(X_n)_{n \ge 0}$ is a Markov chain
    iff for every $f: E \to \R$ bounded, measurable,
    \[
    M_n(f) \coloneqq f(X_n) - f(X_0) - \sum_{j=1}^{n-1} (\mathbf{I} - \mathbf{P})f(X_j)
    \]
    is a martingale
    with respect to the canonical filtration of $(X_n)$.
\end{theorem}
\begin{proof}
    $\implies$
    Fix some bounded, measurable $f : E \to \R$.
    Then, for all $n$, $M_n(f)$ is bounded
    and hence $M_n(f) \in  L^1$.
    $M_n(f)$ is $\cF_n$-measurable for all $n \in \N$.

    In order to prove $\bE[M_{n+1}(f) | \cF_n] = M_n(f)$,
    it suffices to show $\bE[M_{n+1}(f) - M_n(f) | \cF_n] = 0$ a.s.

    We have
    \begin{IEEEeqnarray*}{rCl}
        \bE[M_{n+1}(f) - M_n(f) | \cF_n]
        &=& \bE[f(X_{n+1} | \cF_n] - (\mathbf{P}f)(X_n)\\
        &\overset{\text{Markov property}}{=}& (\mathbf{P}f)(X_n) - (\mathbf{P}f)(X_n)\\
        &=& 0
    \end{IEEEeqnarray*}

    $\impliedby$
    Suppose $(M_n(f))_n$ is a martingale for all bounded, measurable $f$.
    By the martingale property, we have
    \begin{IEEEeqnarray*}{rCl}
        \bE[f(X_{n+1}) | X_n]
        &=& (\mathbf{P}f)(X_n)\\
        &=& \int f(y) \mathbf{P}(X_n, \dif y)
    \end{IEEEeqnarray*}
    This proves (ii).
\end{proof}

\begin{definition}
    Given $\{\mathbf{P}(x, \cdot )\}_{x \in E}$,
    we say that $f: E \to  \R$ is \vocab{harmonic},
    iff $f(x) = (\mathbf{P}f)(x)$
    for all  $x \in E$.
    We call $f$ \vocab{super-harmonic},
    if $(\mathbf{I} - \mathbf{P}) f \ge 0$
    and \vocab{sub-harmonic},
    if $(\mathbf{I} - \mathbf{P}) f \le 0$.
\end{definition}
\begin{corollary}
    If $f$ is (sub/super) harmonic, then for every
    $(E, \{\mathbf{P}(x, \cdot )\}_{x \in E}, \alpha)$
    and every Markov chain $(X_n)_{n \ge 0}$,
    we have that
    $f(X_n)$ is a (sub/super) martingale.
\end{corollary}

\begin{question}
    Given a set $A$ and a function $f$ on a superset of $A$.
    Find a function $u$, such that $u$ is harmonic,
    and $u = f$ on $A$.
\end{question}

Let $u(x) \coloneqq  \bE_x[f(X_{T_A}]$,
where $\bE_x$ is the expectation with respect to the Markov chain
starting in $x$,
and $T_A$ is the stopping time defined by the Markov chain hitting $A$.