Endgame-Analyzer/include/game_state.hpp
Maximilian Keßler 5c4a2bb4f7
Store rationals without denominator
Instead of storing a rational for evey game state,
we just store how many of the factorial(draw pile size) many
game states can be won.
This allows us to save only one 64-bit integer per game state instead of
two and thus reduces memory consumption of the program significantly.
Also, this makes some computations a bit easier, since we do not have to
normalize when recursing - we can just add the numbe of winnable states
for each possible draw.

On the other hand, this means that upon lookup, we have to normalize the
stored values again to retrieve the probabilities.
In particular, one needs to know what the draw pile size of the game
state is in order to interpret the value of the state.
2024-02-09 15:58:15 +01:00

1414 lines
47 KiB
C++

#include <algorithm>
#include <iostream>
#include "myassert.h"
#include "game_state.h"
/**
* Compiling with -DINTEGRITY_CHECK_ON will enable exhaustive integrity check while backtracking.
* These significantly slow down performance, so they are deactivated by default.
*/
#ifdef INTEGRITY_CHECK_ON
#define CHECK_DRAW_PILE_INTEGRITY check_draw_pile_integrity()
#else
#define CHECK_DRAW_PILE_INTEGRITY
#endif
#include "factorial.h"
namespace Hanabi
{
template<size_t num_suits>
std::ostream & operator<<(std::ostream & os, const Stacks<num_suits> & stacks)
{
for (size_t i = 0; i < stacks.size(); i++)
{
os << suit_initials[i] << starting_card_rank - stacks[i];
if (i < stacks.size() - 1)
{
os << ", ";
}
}
return os;
}
template<suit_t num_suits, typename T>
void CardArray<num_suits, T>::fill(T val)
{
for (size_t suit = 0; suit < num_suits; suit++)
{
for (rank_t rank = 0; rank < starting_card_rank; rank++)
{
_array[suit][rank] = val;
}
}
}
template<suit_t num_suits, typename T>
CardArray<num_suits, T>::CardArray(T default_val)
{
fill(default_val);
}
template<suit_t num_suits, typename T>
const T & CardArray<num_suits, T>::operator[](const Card & card) const
{
return _array[card.suit][card.rank];
}
template<suit_t num_suits, typename T>
T & CardArray<num_suits, T>::operator[](const Card & card)
{
return _array[card.suit][card.rank];
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
HanabiState<num_suits, num_players, hand_size>::BacktrackAction::BacktrackAction(
Hanabi::ActionType action_type
, Hanabi::Card discarded_or_played
, Hanabi::hand_index_t index
, bool was_on_8_clues
, bool strike
):
action_type(action_type), discarded(discarded_or_played), index(index), was_on_8_clues(was_on_8_clues), strike(
strike)
{
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> & deck, HanabiStateConfig config):
_clues_gained_on_discard_or_stack_finished(config.num_clues_gained_on_discard_or_stack_finished)
, _score_goal(config.score_goal.value_or(num_suits * 5)), _turn(0), _num_clues(max_num_clues), _num_strikes(0), _weighted_draw_pile_size(deck.size()), _stacks(), _hands(), _draw_pile()
, _endgame_turns_left(no_endgame), _pace(deck.size() - _score_goal - num_players * (hand_size - 1)), _score(0)
, _actions_log(), _relative_representation(), _save_memory(config.save_memory), _position_tablebase()
, _enumerated_states(0)
{
std::fill(_stacks.begin(), _stacks.end(), starting_card_rank);
for (const Card & card: deck)
{
_draw_pile.push_back({card, 1});
}
for (player_t player = 0; player < num_players; player++)
{
for (std::uint8_t index = 0; index < hand_size; index++)
{
draw(index);
}
incr_turn();
}
ASSERT(_turn == 0);
// Prepare card counting
CardArray<num_suits, unsigned> card_multiplicities (0);
for (auto const hand: _hands) {
for (Card const card: hand) {
_num_copies_left[card] += 1;
}
}
for (CardMultiplicity const card_mult : _draw_pile) {
_num_copies_left[card_mult.card] += card_mult.multiplicity;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::give_clue()
{
ASSERT(_num_clues >= clue_t(1));
--_num_clues;
_actions_log.emplace(ActionType::clue, Cards::unknown, 0);
incr_turn();
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::incr_turn()
{
_turn = (_turn + 1) % num_players;
if (_endgame_turns_left != no_endgame)
{
_endgame_turns_left--;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::decr_turn()
{
_turn = (_turn + num_players - 1) % num_players;
if (_endgame_turns_left != no_endgame)
{
_endgame_turns_left++;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::check_draw_pile_integrity() const
{
if (not _relative_representation.initialized)
{
return;
}
if (_draw_pile.size() >= 2)
{
auto copy = _draw_pile;
copy.sort([](CardMultiplicity const & card1, CardMultiplicity const & card2) {
return card1.card.rank < card2.card.rank or
(card1.card.rank == card2.card.rank and card1.card.suit < card2.card.suit);
});
auto before = copy.begin();
for (auto it = std::next(copy.begin()); it != copy.end(); ++it)
{
ASSERT(before->card != it->card);
++before;
}
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_playable(const Hanabi::Card & card) const
{
return card.rank == _stacks[card.suit] - 1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_critical(Card const & card) const
{
return _num_copies_left[card] == 1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::enumerated_states() const
{
return _enumerated_states;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_trash(const Hanabi::Card & card) const
{
return card.rank >= _stacks[card.suit];
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::play(Hanabi::hand_index_t index)
{
play_and_potentially_update(index);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned long
HanabiState<num_suits, num_players, hand_size>::play_and_potentially_update(hand_index_t index, bool cycle)
{
CHECK_DRAW_PILE_INTEGRITY;
ASSERT(index < _hands[_turn].size());
const Card played_card = _hands[_turn][index];
bool const strike = !is_playable(played_card);
_actions_log.emplace(ActionType::play, played_card, index, _num_clues == 8, strike);
if (!strike)
{
--_stacks[played_card.suit];
_score++;
if (played_card.rank == 0 and _num_clues < max_num_clues)
{
// update clues if we played the last played_card of a stack
_num_clues += _clues_gained_on_discard_or_stack_finished;
}
} else {
_num_strikes++;
assert(_num_strikes <= max_num_strikes);
_num_copies_left[played_card]--;
}
const unsigned long multiplicity = draw(index, cycle, !strike);
incr_turn();
CHECK_DRAW_PILE_INTEGRITY;
return multiplicity;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::discard(hand_index_t index)
{
discard_and_potentially_update(index);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned long
HanabiState<num_suits, num_players, hand_size>::discard_and_potentially_update(hand_index_t index, bool cycle)
{
CHECK_DRAW_PILE_INTEGRITY;
ASSERT(index < _hands[_turn].size());
ASSERT(_num_clues != max_num_clues);
const Card discarded_card = _hands[_turn][index];
_num_clues += _clues_gained_on_discard_or_stack_finished;
_pace--;
_num_copies_left[discarded_card]--;
unsigned long multiplicity = draw(index, cycle, false);
_actions_log.emplace(ActionType::discard, discarded_card, index);
incr_turn();
CHECK_DRAW_PILE_INTEGRITY;
return multiplicity;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint8_t HanabiState<num_suits, num_players, hand_size>::find_card_in_hand(
const Hanabi::Card & card
) const
{
auto it = std::find_if(_hands[_turn].begin(), _hands[_turn].end(), [&card, this](Card const & card_in_hand) {
return card_in_hand == card or (is_trash(card) and is_trash(card_in_hand));
});
if (it != _hands[_turn].end())
{
return std::distance(_hands[_turn].begin(), it);
}
return -1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::print(std::ostream & os) const
{
os << "Stacks: " << _stacks << " (score " << +_score << ")";
os << ", clues: " << +_num_clues << ", strikes: " << +_num_strikes << ", turn: " << +_turn;
if (_endgame_turns_left != no_endgame)
{
os << ", " << +_endgame_turns_left << " turns left";
}
os << std::endl;
os << "Draw pile: ";
unsigned num_trash = 0;
for (const auto & [card, mul]: _draw_pile)
{
if (is_trash(card))
{
num_trash += mul;
continue;
}
os << card;
if (mul > 1)
{
os << " (" << +mul << ")";
}
os << ", ";
}
if (num_trash > 0)
{
os << Cards::trash << " (" << num_trash << ") ";
}
os << "[size " << +_weighted_draw_pile_size << "]" << std::endl;
os << "Hands: ";
for (const auto & hand: _hands)
{
os << "[";
for (hand_index_t index = 0; index < hand.size(); index++)
{
if (is_trash(hand[index])) {
os << "kt";
} else {
os << hand[index];
}
if (is_critical(hand[index])) {
os << "!";
}
if (index < hand.size() - 1)
{
os << " ";
}
}
os << "] ";
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned HanabiState<num_suits, num_players, hand_size>::draw(uint8_t index, bool cycle, bool played)
{
ASSERT(index < _hands[_turn].size());
// update card position of the card we are about to discard
if (_relative_representation.initialized)
{
const Card discarded = _hands[_turn][index];
if (!discarded.initial_trash)
{
if (discarded.in_starting_hand)
{
ASSERT(_relative_representation.card_positions_hands[discarded.local_index] ==
RelativeRepresentationData::hand);
if (played)
{
_relative_representation.card_positions_hands[discarded.local_index] = RelativeRepresentationData::played;
}
else
{
_relative_representation.card_positions_hands[discarded.local_index] = RelativeRepresentationData::discarded;
}
}
else
{
auto replaced_card_it = std::find(
_relative_representation.card_positions_draw[discarded.local_index].begin(),
_relative_representation.card_positions_draw[discarded.local_index].end(),
_turn
);
ASSERT(replaced_card_it != _relative_representation.card_positions_draw[discarded.local_index].end());
if (played)
{
*replaced_card_it = RelativeRepresentationData::play_stack;
}
else
{
*replaced_card_it = RelativeRepresentationData::discard_pile;
}
std::sort(
_relative_representation.card_positions_draw[discarded.local_index].begin(),
_relative_representation.card_positions_draw[discarded.local_index].end()
);
}
}
}
// draw a new card if the draw pile is not empty
if (!_draw_pile.empty())
{
--_weighted_draw_pile_size;
const CardMultiplicity draw = _draw_pile.front();
_draw_pile.pop_front();
ASSERT(draw.multiplicity > 0);
if (draw.multiplicity > 1)
{
if (cycle)
{
_draw_pile.push_back(draw);
_draw_pile.back().multiplicity--;
}
else
{
_draw_pile.push_front(draw);
_draw_pile.front().multiplicity--;
}
}
if (_relative_representation.initialized)
{
// update card position of the drawn card
if (!draw.card.initial_trash)
{
ASSERT(draw.card.in_starting_hand == false);
auto new_card_it = std::find(
_relative_representation.card_positions_draw[draw.card.local_index].begin(),
_relative_representation.card_positions_draw[draw.card.local_index].end(),
RelativeRepresentationData::draw_pile
);
ASSERT(new_card_it != _relative_representation.card_positions_draw[draw.card.local_index].end());
*new_card_it = _turn;
std::sort(
_relative_representation.card_positions_draw[draw.card.local_index].begin(),
_relative_representation.card_positions_draw[draw.card.local_index].end()
);
}
}
_hands[_turn][index] = draw.card;
if (_draw_pile.empty())
{
// Note the +1, since we will immediately decrement this when moving to the next player
_endgame_turns_left = num_players + 1;
}
return draw.multiplicity;
}
return 1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_draw(
std::uint8_t index
, Card discarded_card
, bool cycle
, bool played
)
{
// Put the card that is currently in hand back into the draw pile (this does not happen in the last round!)
if (_endgame_turns_left == num_players + 1 || _endgame_turns_left == no_endgame)
{
ASSERT(index < _hands[_turn].size());
const Card & drawn = _hands[_turn][index];
if (cycle)
{
// put discarded_card back into draw pile (at the back)
if (!_draw_pile.empty() and _draw_pile.back().card.suit == drawn.suit and
_draw_pile.back().card.rank == drawn.rank)
{
_draw_pile.back().multiplicity++;
}
else
{
_draw_pile.push_back({drawn, 1});
}
}
else
{
// We don't know where the card came from (between the card having been removed from the draw pile
// and re-adding it now, the user may have arbitrarily permuted the draw pile implicitly)
// so we have to check if it is already contained in the draw pile somewhere
auto it = std::find_if(_draw_pile.begin(), _draw_pile.end(), [&drawn](CardMultiplicity const & mult) {
return mult.card == drawn;
});
if (it != _draw_pile.end())
{
it->multiplicity++;
}
else
{
_draw_pile.push_front({drawn, 1});
}
}
if (_relative_representation.initialized && !drawn.initial_trash)
{
ASSERT(drawn.in_starting_hand == false);
auto drawn_card_it = std::find(
_relative_representation.card_positions_draw[drawn.local_index].begin(),
_relative_representation.card_positions_draw[drawn.local_index].end(),
_turn
);
ASSERT(drawn_card_it != _relative_representation.card_positions_draw[drawn.local_index].end());
*drawn_card_it = RelativeRepresentationData::draw_pile;
std::sort(
_relative_representation.card_positions_draw[drawn.local_index].begin(),
_relative_representation.card_positions_draw[drawn.local_index].end()
);
}
_weighted_draw_pile_size++;
_endgame_turns_left = no_endgame;
}
else
{
ASSERT(_hands[_turn][index] == discarded_card);
}
if (_relative_representation.initialized && !discarded_card.initial_trash)
{
if (discarded_card.in_starting_hand)
{
ASSERT(_relative_representation.card_positions_hands[discarded_card.local_index] !=
RelativeRepresentationData::hand);
_relative_representation.card_positions_hands[discarded_card.local_index] = RelativeRepresentationData::hand;
}
else
{
player_t const old_position = [&played] {
if (played)
{
return RelativeRepresentationData::play_stack;
}
else
{
return RelativeRepresentationData::discard_pile;
}
}();
auto hand_card_it = std::find(
_relative_representation.card_positions_draw[discarded_card.local_index].begin(),
_relative_representation.card_positions_draw[discarded_card.local_index].end(),
old_position
);
ASSERT(hand_card_it != _relative_representation.card_positions_draw[discarded_card.local_index].end());
*hand_card_it = _turn;
std::sort(
_relative_representation.card_positions_draw[discarded_card.local_index].begin(),
_relative_representation.card_positions_draw[discarded_card.local_index].end()
);
}
}
_hands[_turn][index] = discarded_card;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::init_backtracking_information()
{
ASSERT(not _relative_representation.initialized);
// Note that this function does not have to be particularly performant, we only call it once to initialize.
const Card trash = [this]() -> Card {
for (suit_t suit = 0; suit < num_suits; suit++)
{
if (_stacks[suit] < starting_card_rank)
{
return {suit, starting_card_rank - 1, 0, false, true};
}
}
return {0, 0};
}();
CardArray<num_suits, std::uint8_t> nums_in_draw_pile;
for (const auto [card, multiplicity]: _draw_pile)
{
if (_stacks[card.suit] > card.rank)
{
nums_in_draw_pile[card] += multiplicity;
}
else
{
nums_in_draw_pile[trash] += multiplicity;
}
}
// Prepare draw pile
_draw_pile.clear();
for (suit_t suit = 0; suit < num_suits; suit++)
{
for (rank_t rank = 0; rank < starting_card_rank; rank++)
{
Card card{suit, rank, static_cast<uint8_t>(_relative_representation.card_positions_draw.size()), false
, is_trash(card)
};
if (nums_in_draw_pile[card] > 0)
{
_draw_pile.push_back({card, nums_in_draw_pile[card]});
if (!is_trash(card))
{
_relative_representation.card_positions_draw.push_back({});
_relative_representation.card_positions_draw
.back()
.resize(nums_in_draw_pile[card], RelativeRepresentationData::draw_pile);
_relative_representation.good_cards_draw.push_back(card);
}
}
}
}
_relative_representation.initial_draw_pile_size = _weighted_draw_pile_size;
size_t num_useful_cards_in_starting_hands = 0;
// Prepare cards in hands
for (player_t player = 0; player < num_players; player++)
{
for (Card & card: _hands[player])
{
card.initial_trash = is_trash(card);
card.in_starting_hand = true;
// Needed to check for dupes in same hand
boost::container::static_vector<Card, hand_size> good_cards_in_hand;
if (!is_trash(card))
{
if (std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0)
{
// This card is already in hand, so just replace the second copy by some trash
card = trash;
}
else
{
card.local_index = num_useful_cards_in_starting_hands;
num_useful_cards_in_starting_hands++;
good_cards_in_hand.push_back(card);
_relative_representation.good_cards_hands.push_back(card);
}
}
}
}
_relative_representation.card_positions_hands.clear();
_relative_representation.card_positions_hands
.resize(num_useful_cards_in_starting_hands, RelativeRepresentationData::hand);
_relative_representation.initialized = true;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void
HanabiState<num_suits, num_players, hand_size>::revert_play(bool cycle)
{
CHECK_DRAW_PILE_INTEGRITY;
const BacktrackAction last_action = _actions_log.top();
_actions_log.pop();
ASSERT(last_action.action_type == ActionType::play);
ASSERT(!last_action.was_on_8_clues or _num_clues == 8);
decr_turn();
if (last_action.discarded.rank == 0 and not last_action.was_on_8_clues and not last_action.strike)
{
_num_clues -= _clues_gained_on_discard_or_stack_finished;
}
revert_draw(last_action.index, last_action.discarded, cycle, !last_action.strike);
if (not last_action.strike)
{
_stacks[last_action.discarded.suit]++;
_score--;
} else {
// If we misplayed, then we lost the card and have to regain it now
_num_copies_left[last_action.discarded]++;
_num_strikes--;
assert(_num_strikes >= 0);
}
CHECK_DRAW_PILE_INTEGRITY;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_discard(bool cycle)
{
CHECK_DRAW_PILE_INTEGRITY;
const BacktrackAction last_action = _actions_log.top();
_actions_log.pop();
ASSERT(last_action.action_type == ActionType::discard);
decr_turn();
_num_clues -= _clues_gained_on_discard_or_stack_finished;
ASSERT(_num_clues >= clue_t(0));
_pace++;
_num_copies_left[last_action.discarded]++;
revert_draw(last_action.index, last_action.discarded, cycle, false);
CHECK_DRAW_PILE_INTEGRITY;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_clue()
{
const BacktrackAction last_action = _actions_log.top();
_actions_log.pop();
ASSERT(last_action.action_type == ActionType::clue);
decr_turn();
ASSERT(_num_clues < max_num_clues);
_num_clues++;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert()
{
switch (_actions_log.top().action_type)
{
case ActionType::clue:
revert_clue();
break;
case ActionType::discard:
revert_discard();
break;
case ActionType::play:
revert_play();
break;
default:
return;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::modify_clues(Hanabi::clue_t change)
{
_num_clues += change;
if (_num_clues > 8)
{
_num_clues = 8;
}
if (_num_clues < 0)
{
_num_clues = 0;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::set_clues(Hanabi::clue_t clues)
{
ASSERT(clue_t(0) <= clues);
ASSERT(clues <= clue_t(8));
_num_clues = clues;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
player_t HanabiState<num_suits, num_players, hand_size>::turn() const
{
return _turn;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
clue_t HanabiState<num_suits, num_players, hand_size>::num_clues() const
{
return _num_clues;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned HanabiState<num_suits, num_players, hand_size>::num_strikes() const
{
return _num_strikes;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned HanabiState<num_suits, num_players, hand_size>::score() const
{
return _score;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<std::vector<Card>> HanabiState<num_suits, num_players, hand_size>::hands() const
{
std::vector<std::vector<Card>> hands;
for (player_t player = 0; player < num_players; player++)
{
hands.push_back({});
for (const Card & card: _hands[player])
{
hands.back().push_back(card);
}
}
return hands;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<Card> HanabiState<num_suits, num_players, hand_size>::cur_hand() const
{
std::vector<Card> hand;
for (const Card & card: _hands[_turn])
{
hand.push_back(card);
}
return hand;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
HanabiState<num_suits, num_players, hand_size>::possible_next_states(hand_index_t index, bool play)
{
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> next_states;
do_for_each_potential_draw(index, play, [this, &next_states, &index](unsigned multiplicity) {
auto prob = lookup();
// bit hacky to get drawn card here
decr_turn();
const CardMultiplicity drawn_card = {_hands[_turn][index], multiplicity};
incr_turn();
next_states.emplace_back(drawn_card, prob);
});
return next_states;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<std::pair<Action, std::optional<probability_t>>>
HanabiState<num_suits, num_players, hand_size>::get_reasonable_actions(bool evaluate_all, bool reasonable)
{
std::vector<std::pair<Action, std::optional<probability_t>>> actions{};
if (_score == _score_goal or _pace < 0 or _endgame_turns_left == 0)
{
return actions;
}
const std::array<Card, hand_size> & hand = _hands[_turn];
// First, check for playable cards
bool played_trash = false;
for (std::uint8_t index = 0; index < hand_size; index++)
{
Card card = hand[index];
bool const consider_playing = is_playable(card) or (_num_strikes < max_num_strikes and not is_critical(card) and (not reasonable or _num_clues == max_num_clues) and (not is_trash(card) or not played_trash));
if (consider_playing)
{
if (is_trash(card))
{
card = Cards::trash;
played_trash = true;
}
const Action action = {ActionType::play, card};
bool known = true;
probability_t sum_of_probabilities = 0;
do_for_each_potential_draw(index, true, [this, &sum_of_probabilities, &evaluate_all
, &known](const unsigned long multiplicity) {
std::optional<probability_t> prob;
if (evaluate_all) {
prob = evaluate_state();
} else {
prob = lookup();
}
if (prob.has_value())
{
sum_of_probabilities += prob.value() * multiplicity;
}
else
{
known = false;
}
});
if (known)
{
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
const probability_t probability_play = sum_of_probabilities / total_weight;
actions.emplace_back(action, probability_play);
}
else
{
actions.emplace_back(action, std::nullopt);
}
}
}
// Check for discards
if (_pace > 0 and _num_clues < max_num_clues)
{
auto trash_it = std::find_if(hand.begin(), hand.end(),[this](Card const & card){return is_trash(card);});
bool const trash_in_hand = trash_it != hand.end();
bool discarded_trash = false;
std::vector<Card> discarded;
for (std::uint8_t index = 0; index < hand_size; index++)
{
Card card = hand[index];
// We only consider discarding if
// - the card is trash, and we have not listed a trash discard yet
// - the card is not critical, and we have not listed the same card yet
bool const consider_discarding = (is_trash(card) and not discarded_trash)
or ((not trash_in_hand or not reasonable) and (not is_trash(card) and not is_critical(card) and std::find(discarded.begin(), discarded.end(), card) == discarded.end()));
if (consider_discarding)
{
if (is_trash(card))
{
// This is useful for normalizing what we discard and therefore also for later printing routines.
// Also note that this card is automatically identified as trash by the is_trash() method,
// so properly handled by other parts of the program if input again.
card = Cards::trash;
discarded_trash = true;
}
else
{
discarded.push_back(card);
}
const Action action = {ActionType::discard, card};
bool known = true;
probability_t sum_of_probabilities = 0;
do_for_each_potential_draw(index, false, [this, &sum_of_probabilities, &evaluate_all
, &known](const unsigned long multiplicity) {
std::optional<probability_t> prob;
if (evaluate_all) {
prob = evaluate_state();
} else {
prob = lookup();
}
if (prob.has_value())
{
sum_of_probabilities += prob.value() * multiplicity;
}
else
{
known = false;
}
});
if (known)
{
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
const probability_t probability_discard = sum_of_probabilities / total_weight;
actions.emplace_back(action, probability_discard);
}
else
{
actions.emplace_back(action, std::nullopt);
}
}
}
}
if (_num_clues >= clue_t(1))
{
give_clue();
std::optional<probability_t> prob;
if (evaluate_all) {
prob = evaluate_state();
} else {
prob = lookup();
}
const Action action = {ActionType::clue, Cards::unknown};
actions.emplace_back(action, prob);
revert_clue();
}
return actions;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::optional<probability_t> HanabiState<num_suits, num_players, hand_size>::lookup() const
{
std::optional<uint64_t> res = internal_lookup();
if (res.has_value())
{
return probability_t(res.value()) / Factorial::factorial(_weighted_draw_pile_size);
}
else
{
return std::nullopt;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::optional<std::uint64_t> HanabiState<num_suits, num_players, hand_size>::internal_lookup() const
{
if (_score == 5 * num_suits)
{
return Factorial::factorial(_weighted_draw_pile_size);
}
if (_pace < 0 or _endgame_turns_left == 0)
{
return 0;
}
const auto id = unique_id();
if (_position_tablebase.count(id) == 1)
{
return _position_tablebase.at(id);
}
else
{
return std::nullopt;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::rotate_next_draw(const Card & card)
{
auto card_it = std::find_if(_draw_pile.begin()
, _draw_pile.end()
, [&card, this](const CardMultiplicity & card_multiplicity) {
return (is_trash(card) and is_trash(card_multiplicity.card)) or
(card_multiplicity.card.rank == card.rank and card_multiplicity.card.suit == card.suit);
});
ASSERT(card_it != _draw_pile.end());
std::swap(*card_it, _draw_pile.front());
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
ActionType HanabiState<num_suits, num_players, hand_size>::last_action_type() const
{
ASSERT(not _actions_log.empty());
return _actions_log.top().action_type;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::save_state_to_map()
{
if (_save_memory)
{
return _weighted_draw_pile_size % 2 == 0;
}
else
{
return true;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
probability_t HanabiState<num_suits, num_players, hand_size>::evaluate_state()
{
std::uint64_t num_wins = internal_evaluate_state();
return probability_t(num_wins)/ Factorial::factorial(_weighted_draw_pile_size);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::internal_evaluate_state()
{
ASSERT(_relative_representation.initialized);
_enumerated_states++;
const unsigned long id_of_state = unique_id();
if (_score == _score_goal)
{
return Factorial::factorial(_weighted_draw_pile_size);
}
if (_pace < 0 || _endgame_turns_left == 0)
{
return 0;
}
#ifndef GAME_STATE_NO_TABLEBASE_LOOKUP
if (_position_tablebase.count(id_of_state) == 1) {
return _position_tablebase[id_of_state];
}
#endif
// TODO: Have some endgame analysis here?
std::uint64_t best_probability = 0;
const std::array<Card, hand_size> & hand = _hands[_turn];
// First, check for playables
bool played_trash = false;
for (std::uint8_t index = 0; index < hand_size; index++)
{
if (is_playable(hand[index]) or (_num_clues == max_num_clues and _num_strikes < max_num_strikes and not is_critical(hand[index]) and (not is_trash(hand[index]) or not played_trash)))
{
if (is_trash(hand[index])) {
played_trash = true;
}
std::uint64_t const probability_play = check_play_or_discard(index, true);
best_probability = std::max(best_probability, probability_play);
if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
{
update_tablebase(id_of_state, best_probability);
return best_probability;
};
}
}
// Check for discards now
if (_pace > 0 and (_num_clues < max_num_clues or _num_strikes < max_num_strikes))
{
bool const play_card_instead_of_discarding = _num_clues == max_num_clues;
// This will hold the index of trash to discard
std::uint8_t const invalid_index = std::numeric_limits<std::uint8_t>::max();
std::uint8_t discard_index = invalid_index;
for (hand_index_t index = 0; index < hand_size; index++)
{
if (is_trash(hand[index]))
{
discard_index = index;
// All discards are equivalent, do not continue searching for different trash
break;
}
}
// If no trivial trash found, check for duplicates next
if (discard_index == invalid_index) {
for (std::uint8_t index = 0; index < hand_size; index++) {
Card const card = _hands[_turn][index];
auto it = std::find_if(_hands[_turn].begin() + index + 1, _hands[_turn].end(), [&card](Card const & card_in_hand) {
return card_in_hand == card;
});
if (it != _hands[_turn].end()) {
// found a duplicate to discard
discard_index = index;
// Since we are discarding essentially trash, we do not have to consider further actions
break;
}
}
}
// Discard if we found trash now
if (discard_index != invalid_index) {
std::uint64_t const probability_discard = check_play_or_discard(discard_index, play_card_instead_of_discarding);
best_probability = std::max(best_probability, probability_discard);
if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
{
update_tablebase(id_of_state, best_probability);
return best_probability;
};
} else {
// If we reach this state, then there are no dupes in hand, so we need to check if we want to
// sacrifice cards in hand
for(hand_index_t index = 0; index < hand_size; ++index) {
if(!is_critical(hand[index])) {
std::uint64_t const probability_sacrifice = check_play_or_discard(index, play_card_instead_of_discarding);
best_probability = std::max(best_probability, probability_sacrifice);
if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
{
update_tablebase(id_of_state, best_probability);
return best_probability;
};
}
}
}
}
// Last option is to stall
if (_num_clues >= clue_t(1))
{
give_clue();
const std::uint64_t probability_stall = internal_evaluate_state();
revert_clue();
best_probability = std::max(best_probability, probability_stall);
}
update_tablebase(id_of_state, best_probability);
return best_probability;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::check_play_or_discard(hand_index_t index, bool play) {
std::uint64_t sum_of_probabilities = 0;
do_for_each_potential_draw(index, play, [this, &sum_of_probabilities](const unsigned long multiplicity) {
sum_of_probabilities += internal_evaluate_state() * multiplicity;
});
return sum_of_probabilities;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
template<class Function>
void
HanabiState<num_suits, num_players, hand_size>::do_for_each_potential_draw(hand_index_t index, bool play, Function f)
{
auto copy = _draw_pile;
auto do_action = [this, index, play]() {
if (play)
{
return play_and_potentially_update(index, true);
}
else
{
return discard_and_potentially_update(index, true);
}
};
auto revert_action = [this, play]() {
if (play)
{
revert_play(true);
}
else
{
revert_discard(true);
}
};
if (_draw_pile.empty())
{
do_action();
f(1);
revert_action();
}
else
{
unsigned sum_of_multiplicities = 0;
for (size_t i = 0; i < _draw_pile.size(); i++)
{
const unsigned long multiplicity = do_action();
sum_of_multiplicities += multiplicity;
f(multiplicity);
revert_action();
}
ASSERT(sum_of_multiplicities == _weighted_draw_pile_size);
}
ASSERT(_draw_pile == copy);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::unique_id() const
{
// Encode strikes first, since they will often be zero.
unsigned long id = _num_strikes;
// encode all positions of cards that started in draw pile
ASSERT(_relative_representation.card_positions_draw.size() == _relative_representation.good_cards_draw.size());
for (size_t i = 0; i < _relative_representation.card_positions_draw.size(); i++)
{
for (player_t player: _relative_representation.card_positions_draw[i])
{
id *= num_players + 3;
// We normalize here: If a card is already played, then the positions of its other copies
// do not matter, so we can just pretend that they are all in the trash already.
// The resulting states will be equivalent.
if (!is_trash(_relative_representation.good_cards_draw[i]))
{
id += player;
}
else
{
id += RelativeRepresentationData::discard_pile;
}
}
}
// encode positions of cards that started in hands
ASSERT(_relative_representation.card_positions_hands.size() == _relative_representation.good_cards_hands.size());
for(size_t i = 0; i < _relative_representation.card_positions_hands.size(); i++)
{
id *= 3;
// we have to normalize here again and pretend that cards already played are all discarded.
// Note that implicitly, this means that when we lose the last copy of a good card, this encoding pretends that
// the card has been played already.
// However, since we only ever consider actions that do not lose the last copy of a card, this is not a problem
// (unless our base state was already lacking cards, in which case the card is never considered played in any state)
if(is_trash(_relative_representation.good_cards_hands[i]))
{
id += static_cast<std::underlying_type_t<typename RelativeRepresentationData::CardPosition>>(RelativeRepresentationData::CardPosition::discarded);
}
else
{
id += static_cast<std::underlying_type_t<typename RelativeRepresentationData::CardPosition>>(_relative_representation.card_positions_hands[i]);
}
}
// encode number of clues
clue_t const scaled_clues = clue_t(2) * _num_clues;
assert(scaled_clues.denominator() == 1);
id *= (max_num_clues * clue_t(2)).numerator() + 1;
id += scaled_clues.numerator();
// we can encode draw pile size and extra turn in one metric, since we only have extra turns if draw pile is empty
const std::uint8_t draw_pile_size_and_extra_turns = [this]() -> uint8_t {
if (_endgame_turns_left == no_endgame)
{
return _weighted_draw_pile_size + num_players;
}
else
{
return _endgame_turns_left;
}
}();
id *= _relative_representation.initial_draw_pile_size + num_players;
id += draw_pile_size_and_extra_turns;
id *= num_players;
id += _turn;
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
// the number of clues, the draw pile size and whose turn it is.
// This already uniquely determines the current players position, assuming that we never discard good cards
// (and only play them)
return id;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::pair<std::vector<std::uint64_t>, std::vector<Card>>
HanabiState<num_suits, num_players, hand_size>::dump_unique_id_parts() const
{
std::vector<std::uint64_t> ret;
std::vector<Card> cards;
// encode strikes first
ret.push_back(_num_strikes);
// encode all positions of cards that started in draw pile
ASSERT(_relative_representation.card_positions_draw.size() == _relative_representation.good_cards_draw.size());
for (size_t i = 0; i < _relative_representation.card_positions_draw.size(); i++)
{
for (player_t player: _relative_representation.card_positions_draw[i])
{
// We normalize here: If a card is already played, then the positions of its other copies
// do not matter, so we can just pretend that they are all in the trash already.
// The resulting states will be equivalent.
if (!is_trash(_relative_representation.good_cards_draw[i]))
{
ret.push_back(player);
}
else
{
ret.push_back(RelativeRepresentationData::discard_pile);
}
cards.push_back(_relative_representation.good_cards_draw[i]);
}
}
// encode positions of cards that started in hands
ASSERT(_relative_representation.card_positions_hands.size() == _relative_representation.good_cards_hands.size());
for(size_t i = 0; i < _relative_representation.card_positions_hands.size(); i++)
{
// we have to normalize here again and pretend that cards already played are all discarded.
// Note that implicitly, this means that when we lose the last copy of a good card, this encoding pretends that
// the card has been played already.
// However, since we only ever consider actions that do not lose the last copy of a card, this is not a problem
// (unless our base state was already lacking cards, in which case the card is never considered played in any state)
if(is_trash(_relative_representation.good_cards_hands[i]))
{
ret.push_back(static_cast<std::underlying_type_t<typename RelativeRepresentationData::CardPosition>>(RelativeRepresentationData::CardPosition::discarded));
}
else
{
ret.push_back(static_cast<std::underlying_type_t<typename RelativeRepresentationData::CardPosition>>(_relative_representation.card_positions_hands[i]));
}
}
// encode number of clues
clue_t const scaled_clues = clue_t(2) * _num_clues;
assert(scaled_clues.denominator() == 1);
ret.push_back(scaled_clues.numerator());
// we can encode draw pile size and extra turn in one metric, since we only have extra turns if draw pile is empty
const std::uint8_t draw_pile_size_and_extra_turns = [this]() -> uint8_t {
if (_endgame_turns_left == no_endgame)
{
return _weighted_draw_pile_size + num_players;
}
else
{
return _endgame_turns_left;
}
}();
ret.push_back(draw_pile_size_and_extra_turns);
ret.push_back(_turn);
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
// the number of clues, the draw pile size and whose turn it is.
// This already uniquely determines the current players position, assuming that we never discard good cards
// (and only play them)
return {ret, cards};
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
const map_type<unsigned long, std::uint64_t> &
HanabiState<num_suits, num_players, hand_size>::position_tablebase() const
{
return _position_tablebase;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
size_t HanabiState<num_suits, num_players, hand_size>::draw_pile_size() const
{
return _weighted_draw_pile_size;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_relative_state_initialized() const
{
return _relative_representation.initialized;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::update_tablebase(
unsigned long id, std::uint64_t probability
)
{
// This macro can be activated if we want to dump details on all game states visited for analysis purposes.
#ifdef DUMP_STATES
if (id == 87476369689) {
std::cout << *this << std::endl;
const auto [id_parts, cards] = dump_unique_id_parts();
std::cout << "id is: " << id << ", id parts are: ";
for (auto const & part: id_parts) {
std::cout << part << " ";
}
std::cout << ", encoded cards are ";
for (auto const & part: cards) {
std::cout << part << " ";
}
std::cout << ", probability is ";
print_probability(std::cout, probability);
std::cout << "\n" << std::endl;
}
#endif
if (_position_tablebase.count(id) == 1)
{
ASSERT(_position_tablebase[id] == probability);
}
if (save_state_to_map())
{
_position_tablebase[id] = probability;
}
}
} // namespace Hanabi