#include #include #include "myassert.h" #include "game_state.h" #include #include namespace Hanabi { std::ostream &operator<<(std::ostream &os, HanabiStateIF const &hanabi_state) { hanabi_state.print(os); return os; } Card &Card::operator++() { rank++; return *this; } const Card Card::operator++(int) { Card ret = *this; rank++; return ret; } std::ostream &operator<<(std::ostream &os, const Card &card) { os << suit_initials[card.suit] << 5 - card.rank; return os; } template std::ostream &operator<<(std::ostream &os, const Stacks &stacks) { for (size_t i = 0; i < stacks.size() - 1; i++) { os << starting_card_rank - stacks[i] << ", "; } os << starting_card_rank - stacks.back(); return os; } template void CardArray::fill(T val) { for (size_t suit = 0; suit < num_suits; suit++) { for (rank_t rank = 0; rank < starting_card_rank; rank++) { _array[suit][rank] = val; } } } template CardArray::CardArray(T default_val) { fill(default_val); } template const T &CardArray::operator[](const Card &card) const { return _array[card.suit][card.rank]; }; template T &CardArray::operator[](const Card &card) { return _array[card.suit][card.rank]; }; BacktrackAction::BacktrackAction( Hanabi::ActionType action_type, Hanabi::Card discarded_or_played, Hanabi::hand_index_t index ): action_type(action_type), discarded(discarded_or_played), index(index) { } template HanabiState::HanabiState(const std::vector &deck): _turn(0), _num_clues(max_num_clues), _weighted_draw_pile_size(deck.size()), _stacks(), _hands(), _draw_pile(), _endgame_turns_left(no_endgame), _card_positions_draw(), _card_positions_hands(), _num_useful_cards_in_starting_hands(0), _initial_draw_pile_size(0), _pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)), _score(0), _enumerated_states(0) { std::ranges::fill(_stacks, starting_card_rank); for (const Card &card: deck) { _draw_pile.push_back({card, 1}); } for (player_t player = 0; player < num_players; player++) { for (std::uint8_t index = 0; index < hand_size; index++) { draw(index); } incr_turn(); } ASSERT(_turn == 0); } template void HanabiState::clue() { ASSERT(_num_clues > 0); --_num_clues; _actions_log.emplace(ActionType::clue, unknown_card, 0); incr_turn(); } template void HanabiState::incr_turn() { _turn = (_turn + 1) % num_players; if (_endgame_turns_left != no_endgame) { _endgame_turns_left--; } } template void HanabiState::decr_turn() { _turn = (_turn + num_players - 1) % num_players; if (_endgame_turns_left != no_endgame) { _endgame_turns_left++; } } template bool HanabiState::is_playable(const Hanabi::Card &card) const { return card.rank == _stacks[card.suit] - 1; } template std::uint64_t HanabiState::enumerated_states() const { return _enumerated_states; } template bool HanabiState::is_trash(const Hanabi::Card &card) const { return card.rank >= _stacks[card.suit]; } template void HanabiState::play(Hanabi::hand_index_t index) { const Card card = _hands[_turn][index]; if (!is_playable(card)) { draw(index); incr_turn(); return; } play_and_potentially_update(index); } template template unsigned long HanabiState::play_and_potentially_update(hand_index_t index) { ASSERT(index < _hands[_turn].size()); const Card played_card = _hands[_turn][index]; ASSERT(is_playable(played_card)); --_stacks[played_card.suit]; _score++; if (played_card.rank == 0 and _num_clues < max_num_clues) { // update clues if we played the last played_card of a stack _num_clues++; } unsigned long multiplicity = draw(index); _actions_log.emplace(ActionType::play, played_card, index); incr_turn(); return multiplicity; } template void HanabiState::discard(std::uint8_t index) { discard_and_potentially_update(index); } template template unsigned long HanabiState::discard_and_potentially_update(hand_index_t index) { ASSERT(index < _hands[_turn].size()); ASSERT(_num_clues != max_num_clues); const Card discarded_card = _hands[_turn][index]; _num_clues++; _pace--; unsigned long multiplicity = draw(index); _actions_log.emplace(ActionType::discard, discarded_card, index); incr_turn(); return multiplicity; } template std::uint8_t HanabiState::find_card_in_hand( const Hanabi::Card &card) const { for (std::uint8_t i = 0; i < hand_size; i++) { if (_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) { return i; } } return -1; } template void HanabiState::print(std::ostream &os) const { os << "Stacks: " << _stacks << " (score " << +_score << ")"; os << ", clues: " << +_num_clues << ", turn: " << +_turn << std::endl; os << "Draw pile: "; for (const auto &[card, mul]: _draw_pile) { os << card; if (mul > 1) { os << " (" << +mul << ")"; } os << ", "; } os << "(size " << +_weighted_draw_pile_size << ")" << std::endl; os << "Hands: "; for (const auto &hand: _hands) { for (const auto &card: hand) { os << card << ", "; } os << " | "; } } template template unsigned long HanabiState::draw(uint8_t index) { ASSERT(index < _hands[_turn].size()); // update card position of the card we are about to discard if constexpr (update_card_positions) { const Card discarded = _hands[_turn][index]; if (!discarded.initial_trash) { if (discarded.in_starting_hand) { ASSERT(_card_positions_hands[discarded.local_index] == true); _card_positions_hands[discarded.local_index] = false; } else { auto replaced_card_it = std::ranges::find(_card_positions_draw[discarded.local_index], _turn); ASSERT(replaced_card_it != _card_positions_draw[discarded.local_index].end()); *replaced_card_it = trash_or_play_stack; } } } // draw a new card if the draw pile is not empty if (!_draw_pile.empty()) { --_weighted_draw_pile_size; const CardMultiplicity draw = _draw_pile.front(); _draw_pile.pop_front(); ASSERT(draw.multiplicity > 0); if (draw.multiplicity > 1) { _draw_pile.push_back(draw); _draw_pile.back().multiplicity--; } if constexpr (update_card_positions) { // update card position of the drawn card if (!draw.card.initial_trash) { ASSERT(draw.card.in_starting_hand == false); auto new_card_it = std::ranges::find(_card_positions_draw[draw.card.local_index], draw_pile); ASSERT(new_card_it != _card_positions_draw[draw.card.local_index].end()); *new_card_it = _turn; } } _hands[_turn][index] = draw.card; if (_draw_pile.empty()) { // Note the +1, since we will immediately decrement this when moving to the next player _endgame_turns_left = num_players + 1; } return draw.multiplicity; } return 0; } template void HanabiState::revert_draw(std::uint8_t index, Card discarded_card) { if (_endgame_turns_left == num_players + 1 || _endgame_turns_left == no_endgame) { // Put the card that is currently in hand back into the draw pile ASSERT(index < _hands[_turn].size()); const Card &drawn = _hands[_turn][index]; // put discarded_card back into draw pile (at the back) if (!_draw_pile.empty() and _draw_pile.back().card.suit == drawn.suit and _draw_pile.back().card.rank == drawn.rank) { _draw_pile.back().multiplicity++; } else { _draw_pile.push_back({drawn, 1}); } if (!drawn.initial_trash) { ASSERT(drawn.in_starting_hand == false); auto drawn_card_it = std::ranges::find(_card_positions_draw[drawn.local_index], _turn); ASSERT(drawn_card_it != _card_positions_draw[drawn.local_index].end()); *drawn_card_it = draw_pile; } _weighted_draw_pile_size++; _endgame_turns_left = no_endgame; } else { ASSERT(_hands[_turn][index] == discarded_card); } if (!discarded_card.initial_trash) { if (discarded_card.in_starting_hand) { ASSERT(_card_positions_hands[discarded_card.local_index] == false); _card_positions_hands[discarded_card.local_index] = true; } else { auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card.local_index], trash_or_play_stack); ASSERT(hand_card_it != _card_positions_draw[discarded_card.local_index].end()); *hand_card_it = _turn; } } _hands[_turn][index] = discarded_card; } template void HanabiState::normalize_draw_and_positions() { // Note that this function does not have to be particularly performant, we only call it once to initialize. const Card trash = [this]() -> Card { for (suit_t suit = 0; suit < num_suits; suit++) { if (_stacks[suit] < starting_card_rank) { return {suit, starting_card_rank - 1, 0, false, true}; } } return {0, 0}; }(); CardArray nums_in_draw_pile; for (const auto [card, multiplicity]: _draw_pile) { if (_stacks[card.suit] > card.rank) { nums_in_draw_pile[card] += multiplicity; } else { nums_in_draw_pile[trash] += multiplicity; } } // Prepare draw pile _draw_pile.clear(); for (suit_t suit = 0; suit < num_suits; suit++) { for (rank_t rank = 0; rank < starting_card_rank; rank++) { Card card{suit, rank, static_cast(_card_positions_draw.size()), false, is_trash(card)}; if (nums_in_draw_pile[card] > 0) { _draw_pile.push_back({card, nums_in_draw_pile[card]}); if (!is_trash(card)) { _card_positions_draw.push_back({nums_in_draw_pile[card], draw_pile}); _good_cards_draw.push_back(card); } } } } _initial_draw_pile_size = _weighted_draw_pile_size; // Prepare cards in hands for (player_t player = 0; player < num_players; player++) { for (Card &card: _hands[player]) { card.initial_trash = is_trash(card); card.in_starting_hand = true; // Needed to check for dupes in same hand boost::container::static_vector good_cards_in_hand; if (!is_trash(card)) { if (std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0) { // This card is already in hand, so just replace the second copy by some trash card = trash; } else { card.local_index = _num_useful_cards_in_starting_hands; _num_useful_cards_in_starting_hands++; good_cards_in_hand.push_back(card); } } } } _card_positions_hands.reset(); for (size_t i = 0; i < _num_useful_cards_in_starting_hands; i++) { _card_positions_hands[i] = true; } } template void HanabiState::revert_play(bool was_on_8_clues) { const BacktrackAction last_action = _actions_log.top(); _actions_log.pop(); ASSERT(last_action.action_type == ActionType::play); ASSERT(!was_on_8_clues or _num_clues == 8); decr_turn(); if (last_action.discarded.rank == 0 and not was_on_8_clues) { _num_clues--; } revert_draw(last_action.index, last_action.discarded); _stacks[last_action.discarded.suit]++; _score--; } template void HanabiState::revert_discard() { const BacktrackAction last_action = _actions_log.top(); _actions_log.pop(); ASSERT(last_action.action_type == ActionType::discard); decr_turn(); ASSERT(_num_clues > 0); _num_clues--; _pace++; revert_draw(last_action.index, last_action.discarded); } template void HanabiState::revert_clue() { const BacktrackAction last_action = _actions_log.top(); _actions_log.pop(); ASSERT(last_action.action_type == ActionType::clue); decr_turn(); ASSERT(_num_clues < max_num_clues); _num_clues++; } #define RETURN_PROBABILITY \ if (_position_tablebase.contains(id_of_state)) { \ ASSERT(_position_tablebase[id_of_state] == best_probability); \ } \ _position_tablebase[id_of_state] = best_probability; \ \ return best_probability; #define UPDATE_PROBABILITY(new_probability) \ best_probability = std::max(best_probability, new_probability); \ if (best_probability == 1) { \ RETURN_PROBABILITY; \ } template probability_t HanabiState::backtrack(size_t depth) { _enumerated_states++; const unsigned long id_of_state = unique_id(); if (_score == 5 * num_suits) { return 1; } if(_pace < 0 || _endgame_turns_left == 0) { return 0; } if (_position_tablebase.contains(id_of_state)) { return _position_tablebase[id_of_state]; } // TODO: Have some endgame analysis here? // First, check if we have any playable cards probability_t best_probability = 0; const std::array hand = _hands[_turn]; // First, check for playables for(std::uint8_t index = 0; index < hand_size; index++) { if(is_playable(hand[index])) { if (_draw_pile.empty()) { bool on_8_clues = _num_clues == 8; play_and_potentially_update(index); const probability_t probability_for_this_play = backtrack(depth + 1); revert_play(on_8_clues); UPDATE_PROBABILITY(probability_for_this_play); } else { probability_t sum_of_probabilities = 0; uint8_t sum_of_mults = 0; for (size_t i = 0; i < _draw_pile.size(); i++) { bool on_8_clues = _num_clues == 8; const unsigned long multiplicity = play_and_potentially_update(index); sum_of_probabilities += backtrack(depth + 1) * multiplicity; sum_of_mults += multiplicity; revert_play(on_8_clues); ASSERT(sum_of_mults <= _weighted_draw_pile_size); } ASSERT(sum_of_mults == _weighted_draw_pile_size); const probability_t probability_for_this_play = sum_of_probabilities / _weighted_draw_pile_size; UPDATE_PROBABILITY(probability_for_this_play); } } } // Check for discards now if(_pace > 0 and _num_clues < max_num_clues) { for(std::uint8_t index = 0; index < hand_size; index++) { if (is_trash(hand[index])) { probability_t sum_of_probabilities = 0; if (_draw_pile.empty()) { discard_and_potentially_update(index); const probability_t probability_for_this_discard = backtrack(depth + 1); revert_discard(); UPDATE_PROBABILITY(probability_for_this_discard); } else { uint8_t sum_of_mults = 0; for (size_t i = 0; i < _draw_pile.size(); i++) { const unsigned long multiplicity = discard_and_potentially_update(index); sum_of_probabilities += backtrack(depth + 1) * multiplicity; sum_of_mults += multiplicity; revert_discard(); } ASSERT(sum_of_mults == _weighted_draw_pile_size); const probability_t probability_discard = sum_of_probabilities / _weighted_draw_pile_size; UPDATE_PROBABILITY(probability_discard); } // All discards are equivalent, do not continue searching for different trash break; } } } // Last option is to stall if(_num_clues > 0) { clue(); const probability_t probability_stall = backtrack(depth + 1); revert_clue(); UPDATE_PROBABILITY(probability_stall); } RETURN_PROBABILITY; } template std::uint64_t HanabiState::unique_id() const { unsigned long id = 0; // encode all positions of cards that started in draw pile ASSERT(_card_positions_draw.size() == _good_cards_draw.size()); for(size_t i = 0; i < _card_positions_draw.size(); i++) { for(player_t player : _card_positions_draw[i]) { id *= num_players + 2; // We normalize here: If a card is already played, then the positions of its other copies // do not matter, so we can just pretend that they are all in the trash already. // The resulting states will be equivalent. if (!is_trash(_good_cards_draw[i])) { id += player; } else { id += trash_or_play_stack; } } } // encode number of clues id *= max_num_clues + 1; id += _num_clues; // we can encode draw pile size and extra turn in one metric, since we only have extra turns if draw pile is empty const std::uint8_t draw_pile_size_and_extra_turns = [this]() -> uint8_t { if(_endgame_turns_left == no_endgame) { return _weighted_draw_pile_size + num_players; } else { return _endgame_turns_left; } }(); id *= _initial_draw_pile_size + num_players; id += draw_pile_size_and_extra_turns; // encode positions of cards that started in hands id = id << _num_useful_cards_in_starting_hands; id += _card_positions_hands.to_ulong(); id *= num_players; id += _turn; // The id is unique now, since for all relevant cards, we know their position (including if they are played), // the number of clues, the draw pile size and whose turn it is. // This already uniquely determines the current players position, assuming that we never discard good cards // (and only play them) return id; } template std::unordered_map HanabiState::visited_states() const { return _position_tablebase; } template size_t HanabiState::draw_pile_size() const { return _weighted_draw_pile_size; } } // namespace Hanabi