implement backtracking approach

2023-08-06 11:54:57 +02:00 · 2023-08-06 11:54:57 +02:00 · bdab3f3b43
commit bdab3f3b43
parent b690f43a73
4 changed files with 136 additions and 21 deletions
--- a/download.h
+++ b/download.h
@ -135,7 +135,7 @@ namespace Download {
    }

    template <std::size_t num_suits, Hanabi::player_t num_players, std::size_t hand_size>
-    void get_game(std::variant<int, const char *> game_spec, unsigned turn) {
+    Hanabi::HanabiState<num_suits, num_players, hand_size> get_game(std::variant<int, const char *> game_spec, unsigned turn) {
        const boost::json::object game_json = [&game_spec]() {
            if (game_spec.index() == 0) {
                return download_game_json(std::get<int>(game_spec));
@ -150,7 +150,7 @@ namespace Download {

        auto game = produce_state<num_suits, num_players, hand_size>(deck, actions, turn);
        game.normalize_draw_and_positions();
-        std::cout << game << std::endl;
+        return game;
    }


--- a/game_state.h
+++ b/game_state.h
@ -132,8 +132,12 @@ enum class ActionType {

 struct BacktrackAction {
  ActionType type{};
+  // The card that was discarded or played
  Card discarded{};
+  // Index of card in hand that was discarded or played
  std::uint8_t index{};
+  // Multiplicity of new draw (needed for probability calculations)
+  std::uint8_t multiplicity{};
 };

 template <std::size_t num_suits, player_t num_players, std::size_t hand_size>
@ -142,6 +146,8 @@ public:
    HanabiState() = default;
    explicit HanabiState(const std::vector<Card>& deck);

+    double backtrack();
+
    BacktrackAction clue();

    /**
@ -158,7 +164,7 @@ public:

    void revert(const BacktrackAction &action);

-    void draw(std::uint8_t index);
+    uint8_t draw(uint8_t index);

    void revert_draw(std::uint8_t index, Card card);

@ -166,16 +172,21 @@ public:

    void decr_turn();

+    bool is_trash(const Card& card) const;
+    bool is_playable(const Card& card) const;
+
    player_t _turn{};
    clue_t _num_clues{};
-    std::uint8_t _draw_pile_size{};
+    std::uint8_t _weighted_draw_pile_size{};
    Stacks<num_suits> _stacks{};
    std::array<std::array<Card, hand_size>, num_players> _hands{};
    CardArray<num_suits, player_t> _card_positions{};
    std::list<CardMultiplicity> _draw_pile{};
+    std::uint8_t endgame_turns_left;

    // further statistics that we might want to keep track of
    uint8_t _pace{};
+    uint8_t _score{};

    auto operator<=>(const HanabiState &) const = default;
 };
--- a/game_state.hpp
+++ b/game_state.hpp
@ -61,11 +61,13 @@ namespace Hanabi {
    HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> &deck):
    _turn(0),
    _num_clues(max_num_clues),
-    _draw_pile_size(deck.size() - num_players * hand_size),
+    _weighted_draw_pile_size(deck.size() - num_players * hand_size),
    _stacks(),
    _hands(),
    _card_positions(draw_pile),
-    _draw_pile() {
+    _draw_pile(),
+    _pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)),
+    _score(0) {
        std::ranges::fill(_stacks, starting_card_rank);
        for(const Card& card: deck) {
            _draw_pile.push_back({card, 1});
@ -92,11 +94,27 @@ namespace Hanabi {
    template<size_t num_suits, player_t num_players, size_t hand_size>
    void HanabiState<num_suits, num_players, hand_size>::incr_turn() {
        _turn = (_turn + 1) % num_players;
+        if(endgame_turns_left != -1) {
+            endgame_turns_left--;
+        }
    }

    template<size_t num_suits, player_t num_players, size_t hand_size>
    void HanabiState<num_suits, num_players, hand_size>::decr_turn() {
        _turn = (_turn + num_players - 1) % num_players;
+        if (endgame_turns_left != -1) {
+            endgame_turns_left++;
+        }
+    }
+
+    template<size_t num_suits, player_t num_players, size_t hand_size>
+    bool HanabiState<num_suits, num_players, hand_size>::is_playable(const Hanabi::Card &card) const {
+        return card.rank == _stacks[card.suit] - 1;
+    }
+
+    template<size_t num_suits, player_t num_players, size_t hand_size>
+    bool HanabiState<num_suits, num_players, hand_size>::is_trash(const Hanabi::Card &card) const {
+        return card.rank >= _stacks[card.suit];
    }

    template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
@ -107,31 +125,32 @@ namespace Hanabi {
        assert(card.rank == _stacks[card.suit] - 1);

        --_stacks[card.suit];
+        _score++;

-        BacktrackAction ret{ActionType::play, _hands[_turn][index], index};
+        BacktrackAction ret{ActionType::play, _hands[_turn][index], index, 0};

        if (card.rank == 0) {
            // update clues if we played the last card of a stack
            _num_clues++;
        }

-        draw(index);
+        ret.multiplicity = draw(index);
        incr_turn();

        return ret;
    }

    template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
-    BacktrackAction HanabiState<num_suits, num_players, hand_size>::discard(
-            std::uint8_t index) {
+    BacktrackAction HanabiState<num_suits, num_players, hand_size>::discard(std::uint8_t index) {
        assert(index < _hands[_turn].size());
        assert(_num_clues != max_num_clues);

        _num_clues++;
+        _pace--;

        BacktrackAction ret{ActionType::discard, _hands[_turn][index], index};

-        draw(index);
+        ret.multiplicity = draw(index);
        incr_turn();

        return ret;
@ -171,7 +190,7 @@ namespace Hanabi {
    }

    template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
-    void HanabiState<num_suits, num_players, hand_size>::draw(std::uint8_t index) {
+    std::uint8_t HanabiState<num_suits, num_players, hand_size>::draw(uint8_t index) {
        assert(index < _hands[_turn].size());

        const Card& discarded = _hands[_turn][index];
@ -181,24 +200,36 @@ namespace Hanabi {

        // draw a new card if the draw pile is not empty
        if (!_draw_pile.empty()) {
-            --_draw_pile_size;
-            CardMultiplicity draw = _draw_pile.front();
+            --_weighted_draw_pile_size;
+
+            const CardMultiplicity draw = _draw_pile.front();
            _draw_pile.pop_front();
            assert(draw.multiplicity > 0);
+
            if (draw.multiplicity > 1) {
-                draw.multiplicity--;
                _draw_pile.push_back(draw);
+                _draw_pile.back().multiplicity--;
            }
-            draw.card.copy = draw.multiplicity - 1;
-            _hands[_turn][index] = draw.card;
+
+            Card& card_in_hand = _hands[_turn][index];
+            card_in_hand = draw.card;
+            card_in_hand.copy = draw.multiplicity - 1;
+
            if (_stacks[draw.card.suit] > draw.card.rank) {
-                _card_positions[draw.card] = _turn;
+                _card_positions[card_in_hand] = _turn;
            }
+
+            if(_draw_pile.empty()) {
+                endgame_turns_left = num_players;
+            }
+            return draw.multiplicity;
        }
+        return 0;
    }

    template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
    void HanabiState<num_suits, num_players, hand_size>::revert_draw(std::uint8_t index, Card card) {
+        endgame_turns_left = -1;
        assert(index < _hands[_turn].size());
        const Card& discarded = _hands[_turn][index];
        if (_stacks[discarded.suit] > discarded.rank) {
@ -216,7 +247,7 @@ namespace Hanabi {
        if (_stacks[card.suit] > card.rank) {
            _card_positions[card] = _turn;
        }
-        _draw_pile_size++;
+        _weighted_draw_pile_size++;
    }

    template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
@ -277,6 +308,7 @@ namespace Hanabi {
            case ActionType::discard:
                assert(_num_clues > 0);
                _num_clues--;
+                _pace++;
                revert_draw(action.index, action.discarded);
                break;
            case ActionType::play:
@ -285,9 +317,76 @@ namespace Hanabi {
                }
                revert_draw(action.index, action.discarded);
                _stacks[action.discarded.suit]++;
+                _score--;
            default:
                break;
        }
    }

+    #define UPDATE_PROBABILITY(new_probability) \
+        best_probability = std::max(best_probability, new_probability); \
+        if (best_probability == 1) { \
+            return best_probability; \
+        }
+
+    template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
+    double HanabiState<num_suits, num_players, hand_size>::backtrack() {
+        std::cout << *this << std::endl;
+        if (_score == 5 * num_suits) {
+            return 1;
+        }
+        if(_pace < 0 || endgame_turns_left == 0) {
+            return 0;
+        }
+
+        // TODO: Have some endgame analysis here?
+
+        // First, check if we have any playable cards
+        double best_probability = 0;
+        const std::array<Card, hand_size> hand = _hands[_turn];
+
+        // First, check for playables
+        for(std::uint8_t index = 0; index < hand_size; index++) {
+            if(is_playable(hand[index])) {
+                double sum_of_probabilities = 0;
+                for(size_t i = 0; i < _draw_pile.size(); i++) {
+                    BacktrackAction action = play(index);
+                    sum_of_probabilities += backtrack() * action.multiplicity;
+                    revert(action);
+                }
+                const double probability_for_this_play = sum_of_probabilities / _weighted_draw_pile_size;
+                UPDATE_PROBABILITY(probability_for_this_play);
+            }
+        }
+
+        // Check for discards now
+        if(_pace > 0) {
+            for(std::uint8_t index = 0; index < hand_size; index++) {
+                if (is_trash(hand[index])) {
+                    double sum_of_probabilities = 0;
+                    for(size_t i = 0; i < _draw_pile.size(); i++) {
+                        BacktrackAction action = discard(index);
+                        sum_of_probabilities += backtrack() * action.multiplicity;
+                        revert(action);
+                    }
+                    const double probability_discard = sum_of_probabilities / _weighted_draw_pile_size;
+                    UPDATE_PROBABILITY(probability_discard);
+
+                    // All discards are equivalent, do not continue searching for different trash
+                    break;
+                }
+            }
+        }
+
+        // Last option is to stall
+        if(_num_clues > 0) {
+            BacktrackAction action = clue();
+            const double probability_stall = backtrack();
+            revert(action);
+            UPDATE_PROBABILITY(probability_stall);
+        }
+
+        return best_probability;
+    }
+
 } // namespace Hanabi
--- a/main.cpp
+++ b/main.cpp
@ -22,7 +22,7 @@ void test_game() {
  state._hands[0] = {y0, y1, y2, r0, r1};
  state._hands[1] = {r1, r1, y1, r3, r2};
  state._card_positions[r1] = 0;
-  state._draw_pile_size = 1;
+  state._weighted_draw_pile_size = 1;

  auto state2 = state;

@ -38,7 +38,12 @@ void test_game() {
  assert(state == state2);
 }

-void download() { Download::get_game<6,3,5>("1004116.json", 40); }
+void download() {
+    auto game = Download::get_game<6,3,5>("1004116.json", 40);
+    std::cout << game << std::endl;
+    auto res = game.backtrack();
+    std::cout << res << std::endl;
+}

 void print_sizes() {
  std::cout << "size of card -> hand map: " << sizeof(HanabiState<5, 3, 4>)