From bdab3f3b436417b585faa68af95386d1add24cb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Ke=C3=9Fler?= <git@maximilian-kessler.de>
Date: Sun, 6 Aug 2023 11:54:57 +0200
Subject: [PATCH] implement backtracking approach

---
 download.h     |   4 +-
 game_state.h   |  15 +++++-
 game_state.hpp | 129 +++++++++++++++++++++++++++++++++++++++++++------
 main.cpp       |   9 +++-
 4 files changed, 136 insertions(+), 21 deletions(-)
diff --git a/download.h b/download.h
index cba4405..f20d034 100644
--- a/download.h
+++ b/download.h
@@ -135,7 +135,7 @@ namespace Download {
     }
 
     template <std::size_t num_suits, Hanabi::player_t num_players, std::size_t hand_size>
-    void get_game(std::variant<int, const char *> game_spec, unsigned turn) {
+    Hanabi::HanabiState<num_suits, num_players, hand_size> get_game(std::variant<int, const char *> game_spec, unsigned turn) {
         const boost::json::object game_json = [&game_spec]() {
             if (game_spec.index() == 0) {
                 return download_game_json(std::get<int>(game_spec));
@@ -150,7 +150,7 @@ namespace Download {
 
         auto game = produce_state<num_suits, num_players, hand_size>(deck, actions, turn);
         game.normalize_draw_and_positions();
-        std::cout << game << std::endl;
+        return game;
     }
 
 
diff --git a/game_state.h b/game_state.h
index 752d993..21726bc 100644
--- a/game_state.h
+++ b/game_state.h
@@ -132,8 +132,12 @@ enum class ActionType {
 
 struct BacktrackAction {
   ActionType type{};
+  // The card that was discarded or played
   Card discarded{};
+  // Index of card in hand that was discarded or played
   std::uint8_t index{};
+  // Multiplicity of new draw (needed for probability calculations)
+  std::uint8_t multiplicity{};
 };
 
 template <std::size_t num_suits, player_t num_players, std::size_t hand_size>
@@ -142,6 +146,8 @@ public:
     HanabiState() = default;
     explicit HanabiState(const std::vector<Card>& deck);
 
+    double backtrack();
+
     BacktrackAction clue();
 
     /**
@@ -158,7 +164,7 @@ public:
 
     void revert(const BacktrackAction &action);
 
-    void draw(std::uint8_t index);
+    uint8_t draw(uint8_t index);
 
     void revert_draw(std::uint8_t index, Card card);
 
@@ -166,16 +172,21 @@ public:
 
     void decr_turn();
 
+    bool is_trash(const Card& card) const;
+    bool is_playable(const Card& card) const;
+
     player_t _turn{};
     clue_t _num_clues{};
-    std::uint8_t _draw_pile_size{};
+    std::uint8_t _weighted_draw_pile_size{};
     Stacks<num_suits> _stacks{};
     std::array<std::array<Card, hand_size>, num_players> _hands{};
     CardArray<num_suits, player_t> _card_positions{};
     std::list<CardMultiplicity> _draw_pile{};
+    std::uint8_t endgame_turns_left;
 
     // further statistics that we might want to keep track of
     uint8_t _pace{};
+    uint8_t _score{};
 
     auto operator<=>(const HanabiState &) const = default;
 };
diff --git a/game_state.hpp b/game_state.hpp
index 871f129..6faddc2 100644
--- a/game_state.hpp
+++ b/game_state.hpp
@@ -61,11 +61,13 @@ namespace Hanabi {
     HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> &deck):
     _turn(0),
     _num_clues(max_num_clues),
-    _draw_pile_size(deck.size() - num_players * hand_size),
+    _weighted_draw_pile_size(deck.size() - num_players * hand_size),
     _stacks(),
     _hands(),
     _card_positions(draw_pile),
-    _draw_pile() {
+    _draw_pile(),
+    _pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)),
+    _score(0) {
         std::ranges::fill(_stacks, starting_card_rank);
         for(const Card& card: deck) {
             _draw_pile.push_back({card, 1});
@@ -92,11 +94,27 @@ namespace Hanabi {
     template<size_t num_suits, player_t num_players, size_t hand_size>
     void HanabiState<num_suits, num_players, hand_size>::incr_turn() {
         _turn = (_turn + 1) % num_players;
+        if(endgame_turns_left != -1) {
+            endgame_turns_left--;
+        }
     }
 
     template<size_t num_suits, player_t num_players, size_t hand_size>
     void HanabiState<num_suits, num_players, hand_size>::decr_turn() {
         _turn = (_turn + num_players - 1) % num_players;
+        if (endgame_turns_left != -1) {
+            endgame_turns_left++;
+        }
+    }
+
+    template<size_t num_suits, player_t num_players, size_t hand_size>
+    bool HanabiState<num_suits, num_players, hand_size>::is_playable(const Hanabi::Card &card) const {
+        return card.rank == _stacks[card.suit] - 1;
+    }
+
+    template<size_t num_suits, player_t num_players, size_t hand_size>
+    bool HanabiState<num_suits, num_players, hand_size>::is_trash(const Hanabi::Card &card) const {
+        return card.rank >= _stacks[card.suit];
     }
 
     template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
@@ -107,31 +125,32 @@ namespace Hanabi {
         assert(card.rank == _stacks[card.suit] - 1);
 
         --_stacks[card.suit];
+        _score++;
 
-        BacktrackAction ret{ActionType::play, _hands[_turn][index], index};
+        BacktrackAction ret{ActionType::play, _hands[_turn][index], index, 0};
 
         if (card.rank == 0) {
             // update clues if we played the last card of a stack
             _num_clues++;
         }
 
-        draw(index);
+        ret.multiplicity = draw(index);
         incr_turn();
 
         return ret;
     }
 
     template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
-    BacktrackAction HanabiState<num_suits, num_players, hand_size>::discard(
-            std::uint8_t index) {
+    BacktrackAction HanabiState<num_suits, num_players, hand_size>::discard(std::uint8_t index) {
         assert(index < _hands[_turn].size());
         assert(_num_clues != max_num_clues);
 
         _num_clues++;
+        _pace--;
 
         BacktrackAction ret{ActionType::discard, _hands[_turn][index], index};
 
-        draw(index);
+        ret.multiplicity = draw(index);
         incr_turn();
 
         return ret;
@@ -171,7 +190,7 @@ namespace Hanabi {
     }
 
     template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
-    void HanabiState<num_suits, num_players, hand_size>::draw(std::uint8_t index) {
+    std::uint8_t HanabiState<num_suits, num_players, hand_size>::draw(uint8_t index) {
         assert(index < _hands[_turn].size());
 
         const Card& discarded = _hands[_turn][index];
@@ -181,24 +200,36 @@ namespace Hanabi {
 
         // draw a new card if the draw pile is not empty
         if (!_draw_pile.empty()) {
-            --_draw_pile_size;
-            CardMultiplicity draw = _draw_pile.front();
+            --_weighted_draw_pile_size;
+
+            const CardMultiplicity draw = _draw_pile.front();
             _draw_pile.pop_front();
             assert(draw.multiplicity > 0);
+
             if (draw.multiplicity > 1) {
-                draw.multiplicity--;
                 _draw_pile.push_back(draw);
+                _draw_pile.back().multiplicity--;
             }
-            draw.card.copy = draw.multiplicity - 1;
-            _hands[_turn][index] = draw.card;
+
+            Card& card_in_hand = _hands[_turn][index];
+            card_in_hand = draw.card;
+            card_in_hand.copy = draw.multiplicity - 1;
+
             if (_stacks[draw.card.suit] > draw.card.rank) {
-                _card_positions[draw.card] = _turn;
+                _card_positions[card_in_hand] = _turn;
             }
+
+            if(_draw_pile.empty()) {
+                endgame_turns_left = num_players;
+            }
+            return draw.multiplicity;
         }
+        return 0;
     }
 
     template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
     void HanabiState<num_suits, num_players, hand_size>::revert_draw(std::uint8_t index, Card card) {
+        endgame_turns_left = -1;
         assert(index < _hands[_turn].size());
         const Card& discarded = _hands[_turn][index];
         if (_stacks[discarded.suit] > discarded.rank) {
@@ -216,7 +247,7 @@ namespace Hanabi {
         if (_stacks[card.suit] > card.rank) {
             _card_positions[card] = _turn;
         }
-        _draw_pile_size++;
+        _weighted_draw_pile_size++;
     }
 
     template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
@@ -277,6 +308,7 @@ namespace Hanabi {
             case ActionType::discard:
                 assert(_num_clues > 0);
                 _num_clues--;
+                _pace++;
                 revert_draw(action.index, action.discarded);
                 break;
             case ActionType::play:
@@ -285,9 +317,76 @@ namespace Hanabi {
                 }
                 revert_draw(action.index, action.discarded);
                 _stacks[action.discarded.suit]++;
+                _score--;
             default:
                 break;
         }
     }
 
+    #define UPDATE_PROBABILITY(new_probability) \
+        best_probability = std::max(best_probability, new_probability); \
+        if (best_probability == 1) { \
+            return best_probability; \
+        }
+
+    template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
+    double HanabiState<num_suits, num_players, hand_size>::backtrack() {
+        std::cout << *this << std::endl;
+        if (_score == 5 * num_suits) {
+            return 1;
+        }
+        if(_pace < 0 || endgame_turns_left == 0) {
+            return 0;
+        }
+
+        // TODO: Have some endgame analysis here?
+
+        // First, check if we have any playable cards
+        double best_probability = 0;
+        const std::array<Card, hand_size> hand = _hands[_turn];
+
+        // First, check for playables
+        for(std::uint8_t index = 0; index < hand_size; index++) {
+            if(is_playable(hand[index])) {
+                double sum_of_probabilities = 0;
+                for(size_t i = 0; i < _draw_pile.size(); i++) {
+                    BacktrackAction action = play(index);
+                    sum_of_probabilities += backtrack() * action.multiplicity;
+                    revert(action);
+                }
+                const double probability_for_this_play = sum_of_probabilities / _weighted_draw_pile_size;
+                UPDATE_PROBABILITY(probability_for_this_play);
+            }
+        }
+
+        // Check for discards now
+        if(_pace > 0) {
+            for(std::uint8_t index = 0; index < hand_size; index++) {
+                if (is_trash(hand[index])) {
+                    double sum_of_probabilities = 0;
+                    for(size_t i = 0; i < _draw_pile.size(); i++) {
+                        BacktrackAction action = discard(index);
+                        sum_of_probabilities += backtrack() * action.multiplicity;
+                        revert(action);
+                    }
+                    const double probability_discard = sum_of_probabilities / _weighted_draw_pile_size;
+                    UPDATE_PROBABILITY(probability_discard);
+
+                    // All discards are equivalent, do not continue searching for different trash
+                    break;
+                }
+            }
+        }
+
+        // Last option is to stall
+        if(_num_clues > 0) {
+            BacktrackAction action = clue();
+            const double probability_stall = backtrack();
+            revert(action);
+            UPDATE_PROBABILITY(probability_stall);
+        }
+
+        return best_probability;
+    }
+
 } // namespace Hanabi
\ No newline at end of file
diff --git a/main.cpp b/main.cpp
index 79cc03a..e4f72a2 100644
--- a/main.cpp
+++ b/main.cpp
@@ -22,7 +22,7 @@ void test_game() {
   state._hands[0] = {y0, y1, y2, r0, r1};
   state._hands[1] = {r1, r1, y1, r3, r2};
   state._card_positions[r1] = 0;
-  state._draw_pile_size = 1;
+  state._weighted_draw_pile_size = 1;
 
   auto state2 = state;
 
@@ -38,7 +38,12 @@ void test_game() {
   assert(state == state2);
 }
 
-void download() { Download::get_game<6,3,5>("1004116.json", 40); }
+void download() {
+    auto game = Download::get_game<6,3,5>("1004116.json", 40);
+    std::cout << game << std::endl;
+    auto res = game.backtrack();
+    std::cout << res << std::endl;
+}
 
 void print_sizes() {
   std::cout << "size of card -> hand map: " << sizeof(HanabiState<5, 3, 4>)