From c394338c248bda6685b8a020fb7010e2e40b2cf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Ke=C3=9Fler?= Date: Mon, 7 Aug 2023 12:48:25 +0200 Subject: [PATCH] keep track of probabilities in tablebase --- game_state.h | 9 ++- game_state.hpp | 154 ++++++++++++++++++++++++++++++++----------------- main.cpp | 1 + 3 files changed, 111 insertions(+), 53 deletions(-) diff --git a/game_state.h b/game_state.h index 4138559..350924c 100644 --- a/game_state.h +++ b/game_state.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -162,6 +163,7 @@ public: [[nodiscard]] virtual bool is_playable(const Card& card) const = 0; [[nodiscard]] virtual std::uint64_t enumerated_states() const = 0; + [[nodiscard]] virtual std::unordered_map visited_states() const = 0; virtual void normalize_draw_and_positions() = 0; @@ -194,6 +196,7 @@ public: [[nodiscard]] bool is_playable(const Card& card) const final; [[nodiscard]] std::uint64_t enumerated_states() const final; + [[nodiscard]] std::unordered_map visited_states() const final; void normalize_draw_and_positions() final; @@ -232,13 +235,17 @@ private: // This will indicate whether cards that were in hands initially still are in hands std::bitset _card_positions_hands; - uint8_t _num_useful_cards_in_starting_hands; + + size_t _num_useful_cards_in_starting_hands; + size_t _initial_draw_pile_size; // further statistics that we might want to keep track of uint8_t _pace{}; uint8_t _score{}; std::uint64_t _enumerated_states {}; + + std::unordered_map _position_tablebase; }; template diff --git a/game_state.hpp b/game_state.hpp index 5acc4ad..01b01bb 100644 --- a/game_state.hpp +++ b/game_state.hpp @@ -7,7 +7,7 @@ namespace Hanabi { - std::ostream& operator<<(std::ostream& os, HanabiStateIF const& hanabi_state) { + std::ostream &operator<<(std::ostream &os, HanabiStateIF const &hanabi_state) { hanabi_state.print(os); return os; } @@ -29,7 +29,7 @@ namespace Hanabi { } template - std::ostream& operator<<(std::ostream &os, const Stacks &stacks) { + std::ostream &operator<<(std::ostream &os, const Stacks &stacks) { for (size_t i = 0; i < stacks.size() - 1; i++) { os << starting_card_rank - stacks[i] << ", "; } @@ -52,36 +52,37 @@ namespace Hanabi { } template - const T& CardArray::operator[](const Card &card) const { + const T &CardArray::operator[](const Card &card) const { return _array[card.suit][card.rank]; }; template - T& CardArray::operator[](const Card &card) { + T &CardArray::operator[](const Card &card) { return _array[card.suit][card.rank]; }; template HanabiState::HanabiState(const std::vector &deck): - _turn(0), - _num_clues(max_num_clues), - _weighted_draw_pile_size(deck.size()), - _stacks(), - _hands(), - _draw_pile(), - _endgame_turns_left(no_endgame), - _card_positions_draw(), - _card_positions_hands(), - _num_useful_cards_in_starting_hands(0), - _pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)), - _score(0), - _enumerated_states(0) { + _turn(0), + _num_clues(max_num_clues), + _weighted_draw_pile_size(deck.size()), + _stacks(), + _hands(), + _draw_pile(), + _endgame_turns_left(no_endgame), + _card_positions_draw(), + _card_positions_hands(), + _num_useful_cards_in_starting_hands(0), + _initial_draw_pile_size(0), + _pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)), + _score(0), + _enumerated_states(0) { std::ranges::fill(_stacks, starting_card_rank); - for(const Card& card: deck) { + for (const Card &card: deck) { _draw_pile.push_back({card, 1}); } - for(player_t player = 0; player < num_players; player++) { - for(std::uint8_t index = 0; index < hand_size; index++) { + for (player_t player = 0; player < num_players; player++) { + for (std::uint8_t index = 0; index < hand_size; index++) { draw(index); } incr_turn(); @@ -100,7 +101,7 @@ namespace Hanabi { template void HanabiState::incr_turn() { _turn = (_turn + 1) % num_players; - if(_endgame_turns_left != no_endgame) { + if (_endgame_turns_left != no_endgame) { _endgame_turns_left--; } } @@ -184,12 +185,12 @@ namespace Hanabi { template std::uint8_t HanabiState::find_card_in_hand( const Hanabi::Card &card) const { - for(std::uint8_t i = 0; i < hand_size; i++) { - if(_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) { - return i; - } - } - return -1; + for (std::uint8_t i = 0; i < hand_size; i++) { + if (_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) { + return i; + } + } + return -1; } template @@ -220,7 +221,7 @@ namespace Hanabi { ASSERT(index < _hands[_turn].size()); // update card position of the card we are about to discard - if constexpr(update_card_positions) { + if constexpr (update_card_positions) { const Card discarded = _hands[_turn][index]; if (!discarded.initial_trash) { if (discarded.in_starting_hand) { @@ -247,7 +248,7 @@ namespace Hanabi { _draw_pile.back().multiplicity--; } - if constexpr(update_card_positions) { + if constexpr (update_card_positions) { // update card position of the drawn card if (!draw.card.initial_trash) { ASSERT(draw.card.in_starting_hand == false); @@ -259,7 +260,7 @@ namespace Hanabi { _hands[_turn][index] = draw.card; - if(_draw_pile.empty()) { + if (_draw_pile.empty()) { // Note the +1, since we will immediately decrement this when moving to the next player _endgame_turns_left = num_players + 1; } @@ -301,7 +302,8 @@ namespace Hanabi { ASSERT(_card_positions_hands[discarded_card.local_index] == false); _card_positions_hands[discarded_card.local_index] = true; } else { - auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card.local_index], trash_or_play_stack); + auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card.local_index], + trash_or_play_stack); ASSERT(hand_card_it != _card_positions_draw[discarded_card.local_index].end()); *hand_card_it = _turn; } @@ -314,16 +316,16 @@ namespace Hanabi { void HanabiState::normalize_draw_and_positions() { // Note that this function does not have to be particularly performant, we only call it once to initialize. const Card trash = [this]() -> Card { - for(suit_t suit = 0; suit < num_suits; suit++) { - if(_stacks[suit] < starting_card_rank) { + for (suit_t suit = 0; suit < num_suits; suit++) { + if (_stacks[suit] < starting_card_rank) { return {suit, starting_card_rank - 1, 0, false, true}; } } - return {0,0}; + return {0, 0}; }(); CardArray nums_in_draw_pile; - for(const auto [card, multiplicity] : _draw_pile) { + for (const auto [card, multiplicity]: _draw_pile) { if (_stacks[card.suit] > card.rank) { nums_in_draw_pile[card] += multiplicity; } else { @@ -333,27 +335,28 @@ namespace Hanabi { // Prepare draw pile _draw_pile.clear(); - for(suit_t suit = 0; suit < num_suits; suit++) { - for(rank_t rank = 0; rank < starting_card_rank; rank++) { - Card card {suit, rank, static_cast(_card_positions_draw.size()), false, is_trash(card)}; + for (suit_t suit = 0; suit < num_suits; suit++) { + for (rank_t rank = 0; rank < starting_card_rank; rank++) { + Card card{suit, rank, static_cast(_card_positions_draw.size()), false, is_trash(card)}; if (nums_in_draw_pile[card] > 0) { _draw_pile.push_back({card, nums_in_draw_pile[card]}); - if(!is_trash(card)) { + if (!is_trash(card)) { _card_positions_draw.push_back({nums_in_draw_pile[card], draw_pile}); } } } } + _initial_draw_pile_size = _weighted_draw_pile_size; // Prepare cards in hands - for(player_t player = 0; player < num_players; player++) { - for(Card& card : _hands[player]) { + for (player_t player = 0; player < num_players; player++) { + for (Card &card: _hands[player]) { card.initial_trash = is_trash(card); card.in_starting_hand = true; // Needed to check for dupes in same hand boost::container::static_vector good_cards_in_hand; - if(!is_trash(card)) { - if(std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0) { + if (!is_trash(card)) { + if (std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0) { // This card is already in hand, so just replace the second copy by some trash card = trash; } else { @@ -366,13 +369,14 @@ namespace Hanabi { } } _card_positions_hands.reset(); - for(size_t i = 0; i < _num_useful_cards_in_starting_hands; i++) { + for (size_t i = 0; i < _num_useful_cards_in_starting_hands; i++) { _card_positions_hands[i] = true; } } template - void HanabiState::revert_play(const BacktrackAction& action, bool was_on_8_clues) { + void + HanabiState::revert_play(const BacktrackAction &action, bool was_on_8_clues) { ASSERT(!was_on_8_clues or _num_clues == 8); decr_turn(); if (action.discarded.rank == 0 and not was_on_8_clues) { @@ -384,7 +388,7 @@ namespace Hanabi { } template - void HanabiState::revert_discard(const BacktrackAction& action) { + void HanabiState::revert_discard(const BacktrackAction &action) { decr_turn(); ASSERT(_num_clues > 0); _num_clues--; @@ -399,15 +403,26 @@ namespace Hanabi { _num_clues++; } - #define UPDATE_PROBABILITY(new_probability) \ - best_probability = std::max(best_probability, new_probability); \ - if (best_probability == 1) { \ - return best_probability; \ - } + #define RETURN_PROBABILITY \ + if (_position_tablebase.contains(id_of_state)) { \ + ASSERT(_position_tablebase[id_of_state] == best_probability); \ + } else { \ + _position_tablebase[id_of_state] = best_probability; \ + } \ + return best_probability; + + + #define UPDATE_PROBABILITY(new_probability) \ + best_probability = std::max(best_probability, new_probability); \ + if (best_probability == 1) { \ + RETURN_PROBABILITY; \ + } template double HanabiState::backtrack(size_t depth) { _enumerated_states++; + const unsigned long id_of_state = unique_id(); + if (_score == 5 * num_suits) { return 1; } @@ -485,13 +500,48 @@ namespace Hanabi { UPDATE_PROBABILITY(probability_stall); } + _position_tablebase[id_of_state] = best_probability; return best_probability; } template std::uint64_t HanabiState::unique_id() const { - unsigned long id = _card_positions_hands.to_ulong(); + unsigned long id = 0; + + // encode all positions of cards that started in draw pile + for(const auto & positions: _card_positions_draw) { + for(player_t player : positions) { + id *= num_players + 2; + id += player; + } + } + + // encode number of clues + id *= max_num_clues + 1; + id += _num_clues; + + // encode draw pile size + id *= _initial_draw_pile_size; + id += _weighted_draw_pile_size; + + // encode positions of cards that started in hands + id = id << _num_useful_cards_in_starting_hands; + id += _card_positions_hands.to_ulong(); + + id *= num_players; + id += _turn; + + // The id is unique now, since for all relevant cards, we know their position (including if they are played), + // the number of clues, the draw pile size and whose turn it is. + // This already uniquely determines the current players position, assuming that we never discard good cards + // (and only play them) + return id; } + template + std::unordered_map HanabiState::visited_states() const { + return _position_tablebase; + } + } // namespace Hanabi \ No newline at end of file diff --git a/main.cpp b/main.cpp index eac3c1a..34d6376 100644 --- a/main.cpp +++ b/main.cpp @@ -21,6 +21,7 @@ namespace Hanabi { std::cout.precision(10); std::cout << "Probability with optimal play: " << res << std::endl; std::cout << "Enumerated " << game->enumerated_states() << " states" << std::endl; + std::cout << "Visited " << game->visited_states().size() << " unique game states. " << std::endl; } void print_sizes() {