Endgame-Analyzer/game_state.hpp

483 lines
19 KiB
C++

#include <algorithm>
#include <iterator>
#include "myassert.h"
#include "game_state.h"
#include <vector>
namespace Hanabi {
std::ostream& operator<<(std::ostream& os, HanabiStateIF const& hanabi_state) {
hanabi_state.print(os);
return os;
}
Card &Card::operator++() {
rank++;
return *this;
}
const Card Card::operator++(int) {
Card ret = *this;
rank++;
return ret;
}
std::ostream &operator<<(std::ostream &os, const Card &card) {
os << suit_initials[card.suit] << 5 - card.rank;
return os;
}
template<size_t num_suits>
std::ostream& operator<<(std::ostream &os, const Stacks<num_suits> &stacks) {
for (size_t i = 0; i < stacks.size() - 1; i++) {
os << starting_card_rank - stacks[i] << ", ";
}
os << starting_card_rank - stacks.back();
return os;
}
template<suit_t num_suits, typename T>
void CardArray<num_suits, T>::fill(T val) {
for (size_t suit = 0; suit < num_suits; suit++) {
for (rank_t rank = 0; rank < starting_card_rank; rank++) {
_array[suit][rank] = val;
}
}
}
template<suit_t num_suits, typename T>
CardArray<num_suits, T>::CardArray(T default_val) {
fill(default_val);
}
template<suit_t num_suits, typename T>
const T& CardArray<num_suits, T>::operator[](const Card &card) const {
return _array[card.suit][card.rank];
};
template<suit_t num_suits, typename T>
T& CardArray<num_suits, T>::operator[](const Card &card) {
return _array[card.suit][card.rank];
};
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> &deck):
_turn(0),
_num_clues(max_num_clues),
_weighted_draw_pile_size(deck.size()),
_stacks(),
_hands(),
_draw_pile(),
_endgame_turns_left(no_endgame),
_pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)),
_score(0) {
std::ranges::fill(_stacks, starting_card_rank);
for(const Card& card: deck) {
_draw_pile.push_back({card, 1});
}
for(player_t player = 0; player < num_players; player++) {
for(std::uint8_t index = 0; index < hand_size; index++) {
draw<false>(index);
}
incr_turn();
}
ASSERT(_turn == 0);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::clue() {
ASSERT(_num_clues > 0);
--_num_clues;
incr_turn();
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::incr_turn() {
_turn = (_turn + 1) % num_players;
if(_endgame_turns_left != no_endgame) {
_endgame_turns_left--;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::decr_turn() {
_turn = (_turn + num_players - 1) % num_players;
if (_endgame_turns_left != no_endgame) {
_endgame_turns_left++;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_playable(const Hanabi::Card &card) const {
return card.rank == _stacks[card.suit] - 1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::enumerated_states() const {
return _enumerated_states;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_trash(const Hanabi::Card &card) const {
return card.rank >= _stacks[card.suit];
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
BacktrackAction HanabiState<num_suits, num_players, hand_size>::play(Hanabi::hand_index_t index) {
const Card card = _hands[_turn][index];
if (!is_playable(card)) {
BacktrackAction ret{card, index, draw<false>(index)};
incr_turn();
return ret;
}
return play_and_potentially_update<false>(index);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
template<bool update_card_positions>
BacktrackAction HanabiState<num_suits, num_players, hand_size>::play_and_potentially_update(hand_index_t index) {
ASSERT(index < _hands[_turn].size());
const Card card = _hands[_turn][index];
ASSERT(is_playable(card));
--_stacks[card.suit];
_score++;
if (card.rank == 0 and _num_clues < max_num_clues) {
// update clues if we played the last card of a stack
_num_clues++;
}
BacktrackAction ret{card, index, draw<update_card_positions>(index)};
incr_turn();
return ret;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
BacktrackAction HanabiState<num_suits, num_players, hand_size>::discard(std::uint8_t index) {
return discard_and_potentially_update<false>(index);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
template<bool update_card_positions>
BacktrackAction HanabiState<num_suits, num_players, hand_size>::discard_and_potentially_update(hand_index_t index) {
ASSERT(index < _hands[_turn].size());
ASSERT(_num_clues != max_num_clues);
const Card discarded = _hands[_turn][index];
_num_clues++;
_pace--;
BacktrackAction ret{discarded, index, draw<update_card_positions>(index)};
incr_turn();
return ret;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint8_t HanabiState<num_suits, num_players, hand_size>::find_card_in_hand(
const Hanabi::Card &card) const {
for(std::uint8_t i = 0; i < hand_size; i++) {
if(_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) {
return i;
}
}
return -1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::print(std::ostream &os) const {
os << "Stacks: " << _stacks << " (score " << +_score << ")";
os << ", clues: " << +_num_clues << ", turn: " << +_turn << std::endl;
os << "Draw pile: ";
for (const auto &[card, mul]: _draw_pile) {
os << card;
if (mul > 1) {
os << " (" << +mul << ")";
}
os << ", ";
}
os << "(size " << +_weighted_draw_pile_size << ")" << std::endl;
os << "Hands: ";
for (const auto &hand: _hands) {
for (const auto &card: hand) {
os << card << ", ";
}
os << " | ";
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
template<bool update_card_positions>
std::uint8_t HanabiState<num_suits, num_players, hand_size>::draw(uint8_t index) {
ASSERT(index < _hands[_turn].size());
// update card position of the card we are about to discard
if constexpr(update_card_positions) {
const Card discarded = _hands[_turn][index];
if (!discarded.initial_trash) {
if (discarded.local_index_among_good_starting_hand_cards != not_in_starting_hand) {
ASSERT(_card_positions_hands[discarded.local_index_among_good_starting_hand_cards] == true);
_card_positions_hands[discarded.local_index_among_good_starting_hand_cards] = false;
} else {
auto replaced_card_it = std::ranges::find(_card_positions_draw[discarded], _turn);
ASSERT(replaced_card_it != _card_positions_draw[discarded].end());
*replaced_card_it = trash_or_play_stack;
}
}
}
// draw a new card if the draw pile is not empty
if (!_draw_pile.empty()) {
--_weighted_draw_pile_size;
const CardMultiplicity draw = _draw_pile.front();
_draw_pile.pop_front();
ASSERT(draw.multiplicity > 0);
if (draw.multiplicity > 1) {
_draw_pile.push_back(draw);
_draw_pile.back().multiplicity--;
}
if constexpr(update_card_positions) {
// update card position of the drawn card
if (!draw.card.initial_trash) {
auto new_card_it = std::ranges::find(_card_positions_draw[draw.card], draw_pile);
ASSERT(new_card_it != _card_positions_draw[draw.card].end());
*new_card_it = _turn;
}
}
_hands[_turn][index] = draw.card;
if(_draw_pile.empty()) {
// Note the +1, since we will immediately decrement this when moving to the next player
_endgame_turns_left = num_players + 1;
}
return draw.multiplicity;
}
return 0;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_draw(std::uint8_t index, Card discarded_card) {
if (_endgame_turns_left == num_players + 1 || _endgame_turns_left == no_endgame) {
// Put the card that is currently in hand back into the draw pile
ASSERT(index < _hands[_turn].size());
const Card &drawn = _hands[_turn][index];
// put discarded_card back into draw pile (at the back)
if (!_draw_pile.empty() and _draw_pile.back().card.suit == drawn.suit and
_draw_pile.back().card.rank == drawn.rank) {
_draw_pile.back().multiplicity++;
} else {
_draw_pile.push_back({drawn, 1});
}
if (!drawn.initial_trash) {
auto drawn_card_it = std::ranges::find(_card_positions_draw[drawn], _turn);
ASSERT(drawn_card_it != _card_positions_draw[drawn].end());
*drawn_card_it = draw_pile;
}
_weighted_draw_pile_size++;
_endgame_turns_left = no_endgame;
} else {
ASSERT(_hands[_turn][index] == discarded_card);
}
if (!discarded_card.initial_trash) {
if (discarded_card.local_index_among_good_starting_hand_cards != not_in_starting_hand) {
ASSERT(_card_positions_hands[discarded_card.local_index_among_good_starting_hand_cards] == false);
_card_positions_hands[discarded_card.local_index_among_good_starting_hand_cards] = true;
} else {
auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card], trash_or_play_stack);
ASSERT(hand_card_it != _card_positions_draw[discarded_card].end());
*hand_card_it = _turn;
}
}
_hands[_turn][index] = discarded_card;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::normalize_draw_and_positions() {
// Note that this function does not have to be particularly performant, we only call it once to initialize.
const Card trash = [this]() -> Card {
for(suit_t suit = 0; suit < num_suits; suit++) {
if(_stacks[suit] < starting_card_rank) {
return {suit, starting_card_rank - 1, not_in_starting_hand, true};
}
}
return {0,0};
}();
CardArray<num_suits, std::uint8_t> nums_in_draw_pile;
for(const auto [card, multiplicity] : _draw_pile) {
if (_stacks[card.suit] > card.rank) {
nums_in_draw_pile[card] += multiplicity;
} else {
nums_in_draw_pile[trash] += multiplicity;
}
}
// Prepare draw pile
_draw_pile.clear();
for(suit_t suit = 0; suit < num_suits; suit++) {
for(rank_t rank = 0; rank < starting_card_rank; rank++) {
Card card {suit, rank, not_in_starting_hand, is_trash(card)};
_card_positions_draw[card].clear();
if (nums_in_draw_pile[card] > 0) {
_draw_pile.push_back({card, nums_in_draw_pile[card]});
if(!is_trash(card)) {
_card_positions_draw[card].resize(nums_in_draw_pile[card], draw_pile);
}
}
}
}
// Prepare cards in hands
uint8_t local_index_among_good_starting_hand_cards = 0;
for(player_t player = 0; player < num_players; player++) {
for(Card& card : _hands[player]) {
card.initial_trash = is_trash(card);
// Needed to check for dupes in same hand
boost::container::static_vector<Card, hand_size> good_cards_in_hand;
if(!is_trash(card)) {
if(std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0) {
// This card is already in hand, so just replace the second copy by some trash
card = trash;
} else {
card.local_index_among_good_starting_hand_cards = local_index_among_good_starting_hand_cards;
local_index_among_good_starting_hand_cards++;
good_cards_in_hand.push_back(card);
}
}
}
}
_card_positions_hands.reset();
_card_positions_hands.flip();
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_play(const BacktrackAction& action, bool was_on_8_clues) {
ASSERT(!was_on_8_clues or _num_clues == 8);
decr_turn();
if (action.discarded.rank == 0 and not was_on_8_clues) {
_num_clues--;
}
revert_draw(action.index, action.discarded);
_stacks[action.discarded.suit]++;
_score--;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_discard(const BacktrackAction& action) {
decr_turn();
ASSERT(_num_clues > 0);
_num_clues--;
_pace++;
revert_draw(action.index, action.discarded);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_clue() {
decr_turn();
ASSERT(_num_clues < max_num_clues);
_num_clues++;
}
#define UPDATE_PROBABILITY(new_probability) \
best_probability = std::max(best_probability, new_probability); \
if (best_probability == 1) { \
return best_probability; \
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
double HanabiState<num_suits, num_players, hand_size>::backtrack(size_t depth) {
_enumerated_states++;
if (_score == 5 * num_suits) {
return 1;
}
if(_pace < 0 || _endgame_turns_left == 0) {
return 0;
}
// TODO: Have some endgame analysis here?
// First, check if we have any playable cards
double best_probability = 0;
const std::array<Card, hand_size> hand = _hands[_turn];
// First, check for playables
for(std::uint8_t index = 0; index < hand_size; index++) {
if(is_playable(hand[index])) {
if (_draw_pile.empty()) {
bool on_8_clues = _num_clues == 8;
BacktrackAction action = play_and_potentially_update<true>(index);
const double probability_for_this_play = backtrack(depth + 1);
revert_play(action, on_8_clues);
UPDATE_PROBABILITY(probability_for_this_play);
} else {
double sum_of_probabilities = 0;
uint8_t sum_of_mults = 0;
for (size_t i = 0; i < _draw_pile.size(); i++) {
bool on_8_clues = _num_clues == 8;
BacktrackAction action = play_and_potentially_update<true>(index);
sum_of_probabilities += backtrack(depth + 1) * action.multiplicity;
sum_of_mults += action.multiplicity;
revert_play(action, on_8_clues);
ASSERT(sum_of_mults <= _weighted_draw_pile_size);
}
ASSERT(sum_of_mults == _weighted_draw_pile_size);
const double probability_for_this_play = sum_of_probabilities / _weighted_draw_pile_size;
UPDATE_PROBABILITY(probability_for_this_play);
}
}
}
// Check for discards now
if(_pace > 0 and _num_clues < max_num_clues) {
for(std::uint8_t index = 0; index < hand_size; index++) {
if (is_trash(hand[index])) {
double sum_of_probabilities = 0;
if (_draw_pile.empty()) {
BacktrackAction action = discard_and_potentially_update<true>(index);
const double probability_for_this_discard = backtrack(depth + 1);
revert_discard(action);
UPDATE_PROBABILITY(probability_for_this_discard);
} else {
uint8_t sum_of_mults = 0;
for (size_t i = 0; i < _draw_pile.size(); i++) {
BacktrackAction action = discard_and_potentially_update<true>(index);
sum_of_probabilities += backtrack(depth + 1) * action.multiplicity;
sum_of_mults += action.multiplicity;
revert_discard(action);
}
ASSERT(sum_of_mults == _weighted_draw_pile_size);
const double probability_discard = sum_of_probabilities / _weighted_draw_pile_size;
UPDATE_PROBABILITY(probability_discard);
}
// All discards are equivalent, do not continue searching for different trash
break;
}
}
}
// Last option is to stall
if(_num_clues > 0) {
clue();
const double probability_stall = backtrack(depth + 1);
revert_clue();
UPDATE_PROBABILITY(probability_stall);
}
return best_probability;
}
} // namespace Hanabi