keep track of probabilities in tablebase

2023-08-07 12:48:25 +02:00 · 2023-08-07 12:48:25 +02:00 · c394338c24
commit c394338c24
parent f29e3d1202
3 changed files with 111 additions and 53 deletions
--- a/game_state.h
+++ b/game_state.h
@ -5,6 +5,7 @@
 #include <cstdint>
 #include <algorithm>
 #include <cstddef>
+#include <unordered_map>
 #include <bitset>
 #include <limits>
 #include <optional>
@ -162,6 +163,7 @@ public:
    [[nodiscard]] virtual bool is_playable(const Card& card) const = 0;

    [[nodiscard]] virtual std::uint64_t enumerated_states() const = 0;
+    [[nodiscard]] virtual std::unordered_map<unsigned long, double> visited_states() const = 0;

    virtual void normalize_draw_and_positions() = 0;

@ -194,6 +196,7 @@ public:
    [[nodiscard]] bool is_playable(const Card& card) const final;

    [[nodiscard]] std::uint64_t enumerated_states() const final;
+    [[nodiscard]] std::unordered_map<unsigned long, double> visited_states() const final;

    void normalize_draw_and_positions() final;

@ -232,13 +235,17 @@ private:

    // This will indicate whether cards that were in hands initially still are in hands
    std::bitset<num_players * hand_size> _card_positions_hands;
-    uint8_t _num_useful_cards_in_starting_hands;
+
+    size_t _num_useful_cards_in_starting_hands;
+    size_t _initial_draw_pile_size;

    // further statistics that we might want to keep track of
    uint8_t _pace{};
    uint8_t _score{};

    std::uint64_t _enumerated_states {};
+
+    std::unordered_map<unsigned long, double> _position_tablebase;
 };

 template <std::size_t num_suits, player_t num_players, std::size_t hand_size>
--- a/game_state.hpp
+++ b/game_state.hpp
@ -73,6 +73,7 @@ namespace Hanabi {
            _card_positions_draw(),
            _card_positions_hands(),
            _num_useful_cards_in_starting_hands(0),
+            _initial_draw_pile_size(0),
            _pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)),
            _score(0),
            _enumerated_states(0) {
@ -301,7 +302,8 @@ namespace Hanabi {
                ASSERT(_card_positions_hands[discarded_card.local_index] == false);
                _card_positions_hands[discarded_card.local_index] = true;
            } else {
-                auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card.local_index], trash_or_play_stack);
+                auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card.local_index],
+                                                      trash_or_play_stack);
                ASSERT(hand_card_it != _card_positions_draw[discarded_card.local_index].end());
                *hand_card_it = _turn;
            }
@ -344,6 +346,7 @@ namespace Hanabi {
                }
            }
        }
+        _initial_draw_pile_size = _weighted_draw_pile_size;

        // Prepare cards in hands
        for (player_t player = 0; player < num_players; player++) {
@ -372,7 +375,8 @@ namespace Hanabi {
    }

    template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
-    void HanabiState<num_suits, num_players, hand_size>::revert_play(const BacktrackAction& action, bool was_on_8_clues) {
+    void
+    HanabiState<num_suits, num_players, hand_size>::revert_play(const BacktrackAction &action, bool was_on_8_clues) {
        ASSERT(!was_on_8_clues or _num_clues == 8);
        decr_turn();
        if (action.discarded.rank == 0 and not was_on_8_clues) {
@ -399,15 +403,26 @@ namespace Hanabi {
        _num_clues++;
    }

+    #define RETURN_PROBABILITY                                        \
+    if (_position_tablebase.contains(id_of_state)) {                  \
+        ASSERT(_position_tablebase[id_of_state] == best_probability); \
+    } else {                                                          \
+        _position_tablebase[id_of_state] = best_probability;          \
+    }                                                                 \
+    return best_probability;
+
+
    #define UPDATE_PROBABILITY(new_probability)                     \
    best_probability = std::max(best_probability, new_probability); \
    if (best_probability == 1) {                                    \
-            return best_probability; \
+        RETURN_PROBABILITY;                                         \
    }

    template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
    double HanabiState<num_suits, num_players, hand_size>::backtrack(size_t depth) {
        _enumerated_states++;
+        const unsigned long id_of_state = unique_id();
+
        if (_score == 5 * num_suits) {
            return 1;
        }
@ -485,13 +500,48 @@ namespace Hanabi {
            UPDATE_PROBABILITY(probability_stall);
        }

+        _position_tablebase[id_of_state] = best_probability;
        return best_probability;
    }

    template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
    std::uint64_t HanabiState<num_suits, num_players, hand_size>::unique_id() const {
-        unsigned long id = _card_positions_hands.to_ulong();
+        unsigned long id = 0;
+
+        // encode all positions of cards that started in draw pile
+        for(const auto & positions: _card_positions_draw) {
+            for(player_t player : positions) {
+                id *= num_players + 2;
+                id += player;
+            }
+        }
+
+        // encode number of clues
+        id *= max_num_clues + 1;
+        id += _num_clues;
+
+        // encode draw pile size
+        id *= _initial_draw_pile_size;
+        id += _weighted_draw_pile_size;
+
+        // encode positions of cards that started in hands
+        id = id << _num_useful_cards_in_starting_hands;
+        id += _card_positions_hands.to_ulong();
+
+        id *= num_players;
+        id += _turn;
+
+        // The id is unique now, since for all relevant cards, we know their position (including if they are played),
+        // the number of clues, the draw pile size and whose turn it is.
+        // This already uniquely determines the current players position, assuming that we never discard good cards
+        // (and only play them)
+
        return id;
    }

+    template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
+    std::unordered_map<unsigned long, double> HanabiState<num_suits, num_players, hand_size>::visited_states() const {
+        return _position_tablebase;
+    }
+
 } // namespace Hanabi
--- a/main.cpp
+++ b/main.cpp
@ -21,6 +21,7 @@ namespace Hanabi {
        std::cout.precision(10);
        std::cout << "Probability with optimal play: " << res << std::endl;
        std::cout << "Enumerated " << game->enumerated_states() << " states" << std::endl;
+        std::cout << "Visited " << game->visited_states().size() << " unique game states. " << std::endl;
    }

    void print_sizes() {