Endgame-Analyzer/include/game_state.hpp

1424 lines
47 KiB
C++
Raw Permalink Normal View History

2023-08-04 16:28:41 +02:00
#include <algorithm>
2023-11-11 11:44:37 +01:00
#include <iostream>
2023-08-12 19:43:22 +02:00
2023-08-06 15:02:50 +02:00
#include "myassert.h"
#include "game_state.h"
2023-08-04 16:28:41 +02:00
/**
* Compiling with -DINTEGRITY_CHECK_ON will enable exhaustive integrity check while backtracking.
* These significantly slow down performance, so they are deactivated by default.
*/
#ifdef INTEGRITY_CHECK_ON
#define CHECK_DRAW_PILE_INTEGRITY check_draw_pile_integrity()
#else
#define CHECK_DRAW_PILE_INTEGRITY
#endif
#include "factorial.h"
2023-11-16 16:20:04 +01:00
namespace Hanabi
{
2023-08-05 11:55:46 +02:00
2023-11-16 16:20:04 +01:00
template<size_t num_suits>
std::ostream & operator<<(std::ostream & os, const Stacks<num_suits> & stacks)
{
for (size_t i = 0; i < stacks.size(); i++)
{
os << suit_initials[i] << starting_card_rank - stacks[i];
if (i < stacks.size() - 1)
{
os << ", ";
}
2023-08-05 12:19:34 +02:00
}
2023-11-16 16:20:04 +01:00
return os;
}
2023-08-05 12:19:34 +02:00
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, typename T>
void CardArray<num_suits, T>::fill(T val)
{
for (size_t suit = 0; suit < num_suits; suit++)
{
for (rank_t rank = 0; rank < starting_card_rank; rank++)
{
_array[suit][rank] = val;
}
2023-08-07 01:51:24 +02:00
}
2023-11-16 16:20:04 +01:00
}
template<suit_t num_suits, typename T>
CardArray<num_suits, T>::CardArray(T default_val)
{
fill(default_val);
}
template<suit_t num_suits, typename T>
const T & CardArray<num_suits, T>::operator[](const Card & card) const
{
return _array[card.suit][card.rank];
2024-01-12 18:22:48 +01:00
}
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, typename T>
T & CardArray<num_suits, T>::operator[](const Card & card)
{
return _array[card.suit][card.rank];
2024-01-12 18:22:48 +01:00
}
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
HanabiState<num_suits, num_players, hand_size>::BacktrackAction::BacktrackAction(
Hanabi::ActionType action_type
, Hanabi::Card discarded_or_played
, Hanabi::hand_index_t index
, bool was_on_8_clues
, bool strike
):
action_type(action_type), discarded(discarded_or_played), index(index), was_on_8_clues(was_on_8_clues), strike(
strike)
{
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> & deck, HanabiStateConfig config):
_clues_gained_on_discard_or_stack_finished(config.num_clues_gained_on_discard_or_stack_finished)
, _score_goal(config.score_goal.value_or(num_suits * 5)), _turn(0), _num_clues(max_num_clues), _num_strikes(0), _weighted_draw_pile_size(deck.size()), _stacks(), _hands(), _draw_pile()
, _endgame_turns_left(no_endgame), _pace(deck.size() - _score_goal - num_players * (hand_size - 1)), _score(0)
, _actions_log(), _relative_representation(), _save_memory(config.save_memory), _position_tablebase()
2023-11-16 16:20:04 +01:00
, _enumerated_states(0)
{
2024-01-14 18:04:27 +01:00
std::fill(_stacks.begin(), _stacks.end(), starting_card_rank);
2023-11-16 16:20:04 +01:00
for (const Card & card: deck)
{
_draw_pile.push_back({card, 1});
2023-08-05 12:19:34 +02:00
}
2023-11-16 16:20:04 +01:00
for (player_t player = 0; player < num_players; player++)
{
for (std::uint8_t index = 0; index < hand_size; index++)
{
draw(index);
}
incr_turn();
2023-08-05 11:55:46 +02:00
}
2023-11-16 16:20:04 +01:00
ASSERT(_turn == 0);
2024-01-12 14:27:16 +01:00
// Prepare card counting
CardArray<num_suits, unsigned> card_multiplicities (0);
for (auto const hand: _hands) {
for (Card const card: hand) {
_num_copies_left[card] += 1;
}
}
for (CardMultiplicity const card_mult : _draw_pile) {
_num_copies_left[card_mult.card] += card_mult.multiplicity;
}
2023-11-16 16:20:04 +01:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::give_clue()
{
ASSERT(_num_clues >= clue_t(1));
2023-11-16 16:20:04 +01:00
--_num_clues;
_actions_log.emplace(ActionType::clue, Cards::unknown, 0);
incr_turn();
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::incr_turn()
{
_turn = (_turn + 1) % num_players;
if (_endgame_turns_left != no_endgame)
{
_endgame_turns_left--;
2023-08-05 11:55:46 +02:00
}
2023-11-16 16:20:04 +01:00
}
2023-08-05 11:55:46 +02:00
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::decr_turn()
{
_turn = (_turn + num_players - 1) % num_players;
if (_endgame_turns_left != no_endgame)
{
_endgame_turns_left++;
2023-08-06 11:54:57 +02:00
}
2023-11-16 16:20:04 +01:00
}
2023-08-06 11:54:57 +02:00
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::check_draw_pile_integrity() const
{
if (not _relative_representation.initialized)
{
return;
}
if (_draw_pile.size() >= 2)
2023-11-11 11:44:37 +01:00
{
2023-11-16 16:20:04 +01:00
auto copy = _draw_pile;
copy.sort([](CardMultiplicity const & card1, CardMultiplicity const & card2) {
return card1.card.rank < card2.card.rank or
(card1.card.rank == card2.card.rank and card1.card.suit < card2.card.suit);
});
auto before = copy.begin();
for (auto it = std::next(copy.begin()); it != copy.end(); ++it)
2023-11-11 11:44:37 +01:00
{
2023-11-16 16:20:04 +01:00
ASSERT(before->card != it->card);
++before;
2023-11-11 11:44:37 +01:00
}
2023-11-16 16:20:04 +01:00
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_playable(const Hanabi::Card & card) const
{
return card.rank == _stacks[card.suit] - 1;
}
2024-01-12 14:27:16 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_critical(Card const & card) const
{
return _num_copies_left[card] == 1;
}
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::enumerated_states() const
{
return _enumerated_states;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_trash(const Hanabi::Card & card) const
{
return card.rank >= _stacks[card.suit];
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::play(Hanabi::hand_index_t index)
{
2024-03-15 14:57:38 +01:00
play_and_potentially_update(index, false, true);
2023-11-16 16:20:04 +01:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned long
2024-03-15 14:57:38 +01:00
HanabiState<num_suits, num_players, hand_size>::play_and_potentially_update(hand_index_t index, bool cycle, bool allow_strikeout)
2023-11-16 16:20:04 +01:00
{
CHECK_DRAW_PILE_INTEGRITY;
2023-11-16 16:20:04 +01:00
ASSERT(index < _hands[_turn].size());
const Card played_card = _hands[_turn][index];
bool const strike = !is_playable(played_card);
_actions_log.emplace(ActionType::play, played_card, index, _num_clues == 8, strike);
if (!strike)
2023-11-16 16:20:04 +01:00
{
--_stacks[played_card.suit];
_score++;
if (played_card.rank == 0 and _num_clues < max_num_clues)
{
// update clues if we played the last played_card of a stack
_num_clues += _clues_gained_on_discard_or_stack_finished;
2023-11-11 11:44:37 +01:00
}
2024-01-12 14:27:16 +01:00
} else {
2024-01-16 17:19:42 +01:00
_num_strikes++;
2024-03-15 14:57:38 +01:00
ASSERT(_num_strikes <= max_num_strikes or allow_strikeout);
2024-01-12 14:27:16 +01:00
_num_copies_left[played_card]--;
2023-11-11 11:44:37 +01:00
}
2023-11-16 16:20:04 +01:00
const unsigned long multiplicity = draw(index, cycle, !strike);
incr_turn();
CHECK_DRAW_PILE_INTEGRITY;
2023-11-16 16:20:04 +01:00
return multiplicity;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::discard(hand_index_t index)
{
discard_and_potentially_update(index);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned long
HanabiState<num_suits, num_players, hand_size>::discard_and_potentially_update(hand_index_t index, bool cycle)
{
CHECK_DRAW_PILE_INTEGRITY;
2023-11-16 16:20:04 +01:00
ASSERT(index < _hands[_turn].size());
ASSERT(_num_clues != max_num_clues);
const Card discarded_card = _hands[_turn][index];
_num_clues += _clues_gained_on_discard_or_stack_finished;
2023-11-16 16:20:04 +01:00
_pace--;
2024-01-12 14:27:16 +01:00
_num_copies_left[discarded_card]--;
2023-11-16 16:20:04 +01:00
unsigned long multiplicity = draw(index, cycle, false);
_actions_log.emplace(ActionType::discard, discarded_card, index);
incr_turn();
CHECK_DRAW_PILE_INTEGRITY;
2023-11-16 16:20:04 +01:00
return multiplicity;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint8_t HanabiState<num_suits, num_players, hand_size>::find_card_in_hand(
const Hanabi::Card & card
) const
{
auto it = std::find_if(_hands[_turn].begin(), _hands[_turn].end(), [&card, this](Card const & card_in_hand) {
return card_in_hand == card or (is_trash(card) and is_trash(card_in_hand));
});
if (it != _hands[_turn].end())
{
return std::distance(_hands[_turn].begin(), it);
2023-08-06 11:54:57 +02:00
}
2023-11-16 16:20:04 +01:00
return -1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::print(std::ostream & os) const
{
os << "Stacks: " << _stacks << " (score " << +_score << ")";
2024-01-16 17:19:42 +01:00
os << ", clues: " << +_num_clues << ", strikes: " << +_num_strikes << ", turn: " << +_turn;
2023-11-16 16:20:04 +01:00
if (_endgame_turns_left != no_endgame)
{
os << ", " << +_endgame_turns_left << " turns left";
}
2023-11-16 16:20:04 +01:00
os << std::endl;
os << "Draw pile: ";
unsigned num_trash = 0;
for (const auto & [card, mul]: _draw_pile)
{
if (is_trash(card))
{
num_trash += mul;
continue;
}
os << card;
if (mul > 1)
{
os << " (" << +mul << ")";
}
os << ", ";
2023-08-05 11:55:46 +02:00
}
2023-11-16 16:20:04 +01:00
if (num_trash > 0)
{
os << Cards::trash << " (" << num_trash << ") ";
2023-08-07 00:06:50 +02:00
}
2023-11-16 16:20:04 +01:00
os << "[size " << +_weighted_draw_pile_size << "]" << std::endl;
os << "Hands: ";
for (const auto & hand: _hands)
{
os << "[";
for (hand_index_t index = 0; index < hand.size(); index++)
{
2024-01-12 14:59:47 +01:00
if (is_trash(hand[index])) {
os << "kt";
} else {
os << hand[index];
}
2024-01-12 14:27:16 +01:00
if (is_critical(hand[index])) {
os << "!";
}
2023-11-16 16:20:04 +01:00
if (index < hand.size() - 1)
{
os << " ";
}
}
os << "] ";
}
}
2023-08-07 00:06:50 +02:00
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned HanabiState<num_suits, num_players, hand_size>::draw(uint8_t index, bool cycle, bool played)
{
ASSERT(index < _hands[_turn].size());
2023-08-05 11:55:46 +02:00
2023-11-16 16:20:04 +01:00
// update card position of the card we are about to discard
if (_relative_representation.initialized)
{
const Card discarded = _hands[_turn][index];
if (!discarded.initial_trash)
{
if (discarded.in_starting_hand)
{
2023-11-16 16:20:04 +01:00
ASSERT(_relative_representation.card_positions_hands[discarded.local_index] ==
RelativeRepresentationData::hand);
if (played)
{
_relative_representation.card_positions_hands[discarded.local_index] = RelativeRepresentationData::played;
}
else
{
_relative_representation.card_positions_hands[discarded.local_index] = RelativeRepresentationData::discarded;
}
2023-08-05 11:55:46 +02:00
}
2023-11-16 16:20:04 +01:00
else
{
2024-01-14 18:04:27 +01:00
auto replaced_card_it = std::find(
_relative_representation.card_positions_draw[discarded.local_index].begin(),
_relative_representation.card_positions_draw[discarded.local_index].end(),
_turn
);
2023-11-16 16:20:04 +01:00
ASSERT(replaced_card_it != _relative_representation.card_positions_draw[discarded.local_index].end());
if (played)
{
*replaced_card_it = RelativeRepresentationData::play_stack;
}
else
{
*replaced_card_it = RelativeRepresentationData::discard_pile;
}
2024-01-14 18:04:27 +01:00
std::sort(
_relative_representation.card_positions_draw[discarded.local_index].begin(),
_relative_representation.card_positions_draw[discarded.local_index].end()
);
2023-11-16 16:20:04 +01:00
}
}
2023-08-05 11:55:46 +02:00
}
2023-11-16 16:20:04 +01:00
// draw a new card if the draw pile is not empty
if (!_draw_pile.empty())
{
--_weighted_draw_pile_size;
2023-08-07 10:36:03 +02:00
2023-11-16 16:20:04 +01:00
const CardMultiplicity draw = _draw_pile.front();
_draw_pile.pop_front();
ASSERT(draw.multiplicity > 0);
2023-08-05 11:55:46 +02:00
2023-11-16 16:20:04 +01:00
if (draw.multiplicity > 1)
{
if (cycle)
{
_draw_pile.push_back(draw);
_draw_pile.back().multiplicity--;
}
else
{
_draw_pile.push_front(draw);
_draw_pile.front().multiplicity--;
}
}
2023-08-05 11:55:46 +02:00
2023-11-16 16:20:04 +01:00
if (_relative_representation.initialized)
{
// update card position of the drawn card
if (!draw.card.initial_trash)
{
ASSERT(draw.card.in_starting_hand == false);
2024-01-14 18:04:27 +01:00
auto new_card_it = std::find(
_relative_representation.card_positions_draw[draw.card.local_index].begin(),
_relative_representation.card_positions_draw[draw.card.local_index].end(),
RelativeRepresentationData::draw_pile
);
2023-11-16 16:20:04 +01:00
ASSERT(new_card_it != _relative_representation.card_positions_draw[draw.card.local_index].end());
*new_card_it = _turn;
2024-01-14 18:04:27 +01:00
std::sort(
_relative_representation.card_positions_draw[draw.card.local_index].begin(),
_relative_representation.card_positions_draw[draw.card.local_index].end()
);
2023-11-16 16:20:04 +01:00
}
}
2023-08-05 11:55:46 +02:00
2023-11-16 16:20:04 +01:00
_hands[_turn][index] = draw.card;
2023-08-05 11:55:46 +02:00
2023-11-16 16:20:04 +01:00
if (_draw_pile.empty())
{
// Note the +1, since we will immediately decrement this when moving to the next player
_endgame_turns_left = num_players + 1;
}
2023-11-16 16:20:04 +01:00
return draw.multiplicity;
}
2023-11-16 16:20:04 +01:00
return 1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_draw(
std::uint8_t index
, Card discarded_card
, bool cycle
, bool played
)
{
// Put the card that is currently in hand back into the draw pile (this does not happen in the last round!)
if (_endgame_turns_left == num_players + 1 || _endgame_turns_left == no_endgame)
{
ASSERT(index < _hands[_turn].size());
const Card & drawn = _hands[_turn][index];
2023-11-16 16:20:04 +01:00
if (cycle)
{
// put discarded_card back into draw pile (at the back)
if (!_draw_pile.empty() and _draw_pile.back().card.suit == drawn.suit and
_draw_pile.back().card.rank == drawn.rank)
{
_draw_pile.back().multiplicity++;
2023-08-05 11:55:46 +02:00
}
2023-11-16 16:20:04 +01:00
else
{
_draw_pile.push_back({drawn, 1});
2023-08-12 11:09:06 +02:00
}
2023-11-16 16:20:04 +01:00
}
else
{
// We don't know where the card came from (between the card having been removed from the draw pile
// and re-adding it now, the user may have arbitrarily permuted the draw pile implicitly)
// so we have to check if it is already contained in the draw pile somewhere
auto it = std::find_if(_draw_pile.begin(), _draw_pile.end(), [&drawn](CardMultiplicity const & mult) {
return mult.card == drawn;
});
if (it != _draw_pile.end())
{
it->multiplicity++;
2023-08-05 11:55:46 +02:00
}
2023-11-16 16:20:04 +01:00
else
{
_draw_pile.push_front({drawn, 1});
2023-08-07 11:04:53 +02:00
}
2023-11-16 16:20:04 +01:00
}
2023-08-07 11:04:53 +02:00
2023-11-16 16:20:04 +01:00
if (_relative_representation.initialized && !drawn.initial_trash)
{
ASSERT(drawn.in_starting_hand == false);
2024-01-14 18:04:27 +01:00
auto drawn_card_it = std::find(
_relative_representation.card_positions_draw[drawn.local_index].begin(),
_relative_representation.card_positions_draw[drawn.local_index].end(),
_turn
);
2023-11-16 16:20:04 +01:00
ASSERT(drawn_card_it != _relative_representation.card_positions_draw[drawn.local_index].end());
*drawn_card_it = RelativeRepresentationData::draw_pile;
2024-01-14 18:04:27 +01:00
std::sort(
_relative_representation.card_positions_draw[drawn.local_index].begin(),
_relative_representation.card_positions_draw[drawn.local_index].end()
);
2023-11-16 16:20:04 +01:00
}
2023-08-06 11:54:57 +02:00
2023-11-16 16:20:04 +01:00
_weighted_draw_pile_size++;
_endgame_turns_left = no_endgame;
}
else
{
ASSERT(_hands[_turn][index] == discarded_card);
2023-08-05 11:55:46 +02:00
}
2023-11-16 16:20:04 +01:00
if (_relative_representation.initialized && !discarded_card.initial_trash)
{
if (discarded_card.in_starting_hand)
{
ASSERT(_relative_representation.card_positions_hands[discarded_card.local_index] !=
RelativeRepresentationData::hand);
_relative_representation.card_positions_hands[discarded_card.local_index] = RelativeRepresentationData::hand;
}
else
{
player_t const old_position = [&played] {
if (played)
{
return RelativeRepresentationData::play_stack;
}
else
{
return RelativeRepresentationData::discard_pile;
}
}();
2024-01-14 18:04:27 +01:00
auto hand_card_it = std::find(
_relative_representation.card_positions_draw[discarded_card.local_index].begin(),
_relative_representation.card_positions_draw[discarded_card.local_index].end(),
old_position
);
2023-11-16 16:20:04 +01:00
ASSERT(hand_card_it != _relative_representation.card_positions_draw[discarded_card.local_index].end());
*hand_card_it = _turn;
2024-01-14 18:04:27 +01:00
std::sort(
_relative_representation.card_positions_draw[discarded_card.local_index].begin(),
_relative_representation.card_positions_draw[discarded_card.local_index].end()
);
2023-11-16 16:20:04 +01:00
}
}
2023-08-07 10:36:03 +02:00
2023-11-16 16:20:04 +01:00
_hands[_turn][index] = discarded_card;
}
2023-08-07 10:36:03 +02:00
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::init_backtracking_information()
{
2024-06-14 11:18:24 +02:00
if (_weighted_draw_pile_size > 20)
{
std::stringstream ss;
ss << "Detected draw pile size of " << +_weighted_draw_pile_size << " is too big, can at most be 20.";
throw std::runtime_error(ss.str());
}
2023-11-16 16:20:04 +01:00
ASSERT(not _relative_representation.initialized);
// Note that this function does not have to be particularly performant, we only call it once to initialize.
const Card trash = [this]() -> Card {
for (suit_t suit = 0; suit < num_suits; suit++)
{
if (_stacks[suit] < starting_card_rank)
{
return {suit, starting_card_rank - 1, 0, false, true};
2023-08-07 10:36:03 +02:00
}
2023-11-16 16:20:04 +01:00
}
return {0, 0};
}();
2023-08-07 10:36:03 +02:00
2023-11-16 16:20:04 +01:00
CardArray<num_suits, std::uint8_t> nums_in_draw_pile;
for (const auto [card, multiplicity]: _draw_pile)
{
if (_stacks[card.suit] > card.rank)
{
nums_in_draw_pile[card] += multiplicity;
}
else
{
nums_in_draw_pile[trash] += multiplicity;
}
2023-08-05 11:55:46 +02:00
}
2023-11-16 16:20:04 +01:00
// Prepare draw pile
_draw_pile.clear();
for (suit_t suit = 0; suit < num_suits; suit++)
{
for (rank_t rank = 0; rank < starting_card_rank; rank++)
{
Card card{suit, rank, static_cast<uint8_t>(_relative_representation.card_positions_draw.size()), false
, is_trash(card)
};
if (nums_in_draw_pile[card] > 0)
{
_draw_pile.push_back({card, nums_in_draw_pile[card]});
if (!is_trash(card))
{
_relative_representation.card_positions_draw.push_back({});
_relative_representation.card_positions_draw
.back()
.resize(nums_in_draw_pile[card], RelativeRepresentationData::draw_pile);
_relative_representation.good_cards_draw.push_back(card);
}
}
2023-11-16 16:20:04 +01:00
}
}
_relative_representation.initial_draw_pile_size = _weighted_draw_pile_size;
2023-11-16 16:20:04 +01:00
size_t num_useful_cards_in_starting_hands = 0;
2023-11-16 16:20:04 +01:00
// Prepare cards in hands
for (player_t player = 0; player < num_players; player++)
{
for (Card & card: _hands[player])
{
card.initial_trash = is_trash(card);
card.in_starting_hand = true;
// Needed to check for dupes in same hand
boost::container::static_vector<Card, hand_size> good_cards_in_hand;
if (!is_trash(card))
{
if (std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0)
{
// This card is already in hand, so just replace the second copy by some trash
card = trash;
}
else
{
card.local_index = num_useful_cards_in_starting_hands;
num_useful_cards_in_starting_hands++;
2023-11-16 16:20:04 +01:00
good_cards_in_hand.push_back(card);
_relative_representation.good_cards_hands.push_back(card);
2023-11-16 16:20:04 +01:00
}
}
}
}
2023-11-16 16:20:04 +01:00
_relative_representation.card_positions_hands.clear();
_relative_representation.card_positions_hands
.resize(num_useful_cards_in_starting_hands, RelativeRepresentationData::hand);
_relative_representation.initialized = true;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void
HanabiState<num_suits, num_players, hand_size>::revert_play(bool cycle)
{
CHECK_DRAW_PILE_INTEGRITY;
2023-11-16 16:20:04 +01:00
const BacktrackAction last_action = _actions_log.top();
_actions_log.pop();
ASSERT(last_action.action_type == ActionType::play);
ASSERT(!last_action.was_on_8_clues or _num_clues == 8);
decr_turn();
if (last_action.discarded.rank == 0 and not last_action.was_on_8_clues and not last_action.strike)
{
_num_clues -= _clues_gained_on_discard_or_stack_finished;
}
2023-11-16 16:20:04 +01:00
revert_draw(last_action.index, last_action.discarded, cycle, !last_action.strike);
if (not last_action.strike)
{
_stacks[last_action.discarded.suit]++;
_score--;
2024-01-12 14:27:16 +01:00
} else {
// If we misplayed, then we lost the card and have to regain it now
_num_copies_left[last_action.discarded]++;
2024-01-16 17:19:42 +01:00
_num_strikes--;
assert(_num_strikes >= 0);
2023-11-16 16:20:04 +01:00
}
CHECK_DRAW_PILE_INTEGRITY;
2023-11-16 16:20:04 +01:00
}
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_discard(bool cycle)
{
CHECK_DRAW_PILE_INTEGRITY;
2023-11-16 16:20:04 +01:00
const BacktrackAction last_action = _actions_log.top();
_actions_log.pop();
2023-11-16 16:20:04 +01:00
ASSERT(last_action.action_type == ActionType::discard);
2023-11-16 16:20:04 +01:00
decr_turn();
_num_clues -= _clues_gained_on_discard_or_stack_finished;
ASSERT(_num_clues >= clue_t(0));
2023-11-16 16:20:04 +01:00
_pace++;
2024-01-12 14:27:16 +01:00
_num_copies_left[last_action.discarded]++;
2023-11-16 16:20:04 +01:00
revert_draw(last_action.index, last_action.discarded, cycle, false);
CHECK_DRAW_PILE_INTEGRITY;
2023-11-16 16:20:04 +01:00
}
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_clue()
{
const BacktrackAction last_action = _actions_log.top();
_actions_log.pop();
2023-11-16 16:20:04 +01:00
ASSERT(last_action.action_type == ActionType::clue);
2023-11-16 16:20:04 +01:00
decr_turn();
ASSERT(_num_clues < max_num_clues);
2023-11-16 16:20:04 +01:00
_num_clues++;
}
2023-08-05 11:55:46 +02:00
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert()
{
switch (_actions_log.top().action_type)
{
case ActionType::clue:
revert_clue();
break;
case ActionType::discard:
revert_discard();
break;
case ActionType::play:
revert_play();
break;
default:
return;
}
2023-11-16 16:20:04 +01:00
}
2023-11-10 23:53:28 +01:00
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::modify_clues(Hanabi::clue_t change)
{
_num_clues += change;
if (_num_clues > 8)
2023-11-10 23:53:28 +01:00
{
2023-11-16 16:20:04 +01:00
_num_clues = 8;
2023-11-10 23:53:28 +01:00
}
2023-11-16 16:20:04 +01:00
if (_num_clues < 0)
{
_num_clues = 0;
}
2023-11-16 16:20:04 +01:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::set_clues(Hanabi::clue_t clues)
{
ASSERT(clue_t(0) <= clues);
ASSERT(clues <= clue_t(8));
2023-11-16 16:20:04 +01:00
_num_clues = clues;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
player_t HanabiState<num_suits, num_players, hand_size>::turn() const
{
return _turn;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
clue_t HanabiState<num_suits, num_players, hand_size>::num_clues() const
{
return _num_clues;
}
2024-01-16 17:19:42 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned HanabiState<num_suits, num_players, hand_size>::num_strikes() const
{
return _num_strikes;
}
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned HanabiState<num_suits, num_players, hand_size>::score() const
{
return _score;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<std::vector<Card>> HanabiState<num_suits, num_players, hand_size>::hands() const
{
std::vector<std::vector<Card>> hands;
for (player_t player = 0; player < num_players; player++)
{
hands.push_back({});
for (const Card & card: _hands[player])
{
hands.back().push_back(card);
}
}
2023-11-16 16:20:04 +01:00
return hands;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<Card> HanabiState<num_suits, num_players, hand_size>::cur_hand() const
{
std::vector<Card> hand;
for (const Card & card: _hands[_turn])
{
hand.push_back(card);
}
2023-11-16 16:20:04 +01:00
return hand;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
HanabiState<num_suits, num_players, hand_size>::possible_next_states(hand_index_t index, bool play)
{
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> next_states;
do_for_each_potential_draw(index, play, [this, &next_states, &index](unsigned multiplicity) {
auto prob = lookup();
// bit hacky to get drawn card here
decr_turn();
const CardMultiplicity drawn_card = {_hands[_turn][index], multiplicity};
incr_turn();
next_states.emplace_back(drawn_card, prob);
});
return next_states;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<std::pair<Action, std::optional<probability_t>>>
HanabiState<num_suits, num_players, hand_size>::get_reasonable_actions(bool evaluate_all, bool reasonable)
2023-11-16 16:20:04 +01:00
{
std::vector<std::pair<Action, std::optional<probability_t>>> actions{};
2023-11-16 16:20:04 +01:00
2024-03-15 14:57:38 +01:00
if (_score == _score_goal or _pace < 0 or _endgame_turns_left == 0 or _num_strikes > max_num_strikes)
2023-11-16 16:20:04 +01:00
{
return actions;
}
2023-11-16 16:20:04 +01:00
const std::array<Card, hand_size> & hand = _hands[_turn];
// First, check for playable cards
2024-01-13 14:36:36 +01:00
bool played_trash = false;
2023-11-16 16:20:04 +01:00
for (std::uint8_t index = 0; index < hand_size; index++)
{
2024-01-13 14:36:36 +01:00
Card card = hand[index];
2024-01-16 17:19:42 +01:00
bool const consider_playing = is_playable(card) or (_num_strikes < max_num_strikes and not is_critical(card) and (not reasonable or _num_clues == max_num_clues) and (not is_trash(card) or not played_trash));
if (consider_playing)
2023-11-16 16:20:04 +01:00
{
2024-01-13 14:36:36 +01:00
if (is_trash(card))
{
card = Cards::trash;
played_trash = true;
}
const Action action = {ActionType::play, card};
2023-11-16 16:20:04 +01:00
bool known = true;
probability_t sum_of_probabilities = 0;
do_for_each_potential_draw(index, true, [this, &sum_of_probabilities, &evaluate_all
2023-11-16 16:20:04 +01:00
, &known](const unsigned long multiplicity) {
std::optional<probability_t> prob;
if (evaluate_all) {
prob = evaluate_state();
} else {
prob = lookup();
}
2023-11-16 16:20:04 +01:00
if (prob.has_value())
{
sum_of_probabilities += prob.value() * multiplicity;
}
else
{
known = false;
}
});
if (known)
{
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
const probability_t probability_play = sum_of_probabilities / total_weight;
actions.emplace_back(action, probability_play);
}
2023-11-16 16:20:04 +01:00
else
{
actions.emplace_back(action, std::nullopt);
2023-11-16 16:20:04 +01:00
}
}
}
// Check for discards
2023-11-16 16:20:04 +01:00
if (_pace > 0 and _num_clues < max_num_clues)
{
auto trash_it = std::find_if(hand.begin(), hand.end(),[this](Card const & card){return is_trash(card);});
bool const trash_in_hand = trash_it != hand.end();
bool discarded_trash = false;
std::vector<Card> discarded;
2023-11-16 16:20:04 +01:00
for (std::uint8_t index = 0; index < hand_size; index++)
{
Card card = hand[index];
// We only consider discarding if
// - the card is trash, and we have not listed a trash discard yet
// - the card is not critical, and we have not listed the same card yet
bool const consider_discarding = (is_trash(card) and not discarded_trash)
or ((not trash_in_hand or not reasonable) and (not is_trash(card) and not is_critical(card) and std::find(discarded.begin(), discarded.end(), card) == discarded.end()));
if (consider_discarding)
2023-11-16 16:20:04 +01:00
{
if (is_trash(card))
{
// This is useful for normalizing what we discard and therefore also for later printing routines.
// Also note that this card is automatically identified as trash by the is_trash() method,
// so properly handled by other parts of the program if input again.
card = Cards::trash;
discarded_trash = true;
}
else
{
discarded.push_back(card);
}
const Action action = {ActionType::discard, card};
2023-11-16 16:20:04 +01:00
bool known = true;
probability_t sum_of_probabilities = 0;
2023-08-11 15:41:03 +02:00
do_for_each_potential_draw(index, false, [this, &sum_of_probabilities, &evaluate_all
2023-11-16 16:20:04 +01:00
, &known](const unsigned long multiplicity) {
std::optional<probability_t> prob;
if (evaluate_all) {
prob = evaluate_state();
} else {
prob = lookup();
}
2023-11-16 16:20:04 +01:00
if (prob.has_value())
{
sum_of_probabilities += prob.value() * multiplicity;
2023-08-11 15:41:03 +02:00
}
2023-11-16 16:20:04 +01:00
else
{
known = false;
2023-08-11 15:41:03 +02:00
}
2023-11-16 16:20:04 +01:00
});
if (known)
{
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
const probability_t probability_discard = sum_of_probabilities / total_weight;
actions.emplace_back(action, probability_discard);
2023-11-16 16:20:04 +01:00
}
else
{
actions.emplace_back(action, std::nullopt);
2023-11-16 16:20:04 +01:00
}
2023-08-11 15:41:03 +02:00
}
2023-11-16 16:20:04 +01:00
}
2023-08-11 15:41:03 +02:00
}
if (_num_clues >= clue_t(1))
2023-11-16 16:20:04 +01:00
{
give_clue();
std::optional<probability_t> prob;
if (evaluate_all) {
prob = evaluate_state();
} else {
prob = lookup();
}
2023-11-16 16:20:04 +01:00
const Action action = {ActionType::clue, Cards::unknown};
actions.emplace_back(action, prob);
2023-11-16 16:20:04 +01:00
revert_clue();
2023-08-11 15:41:03 +02:00
}
return actions;
2023-11-16 16:20:04 +01:00
}
2023-08-06 11:54:57 +02:00
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::optional<probability_t> HanabiState<num_suits, num_players, hand_size>::lookup() const
{
std::optional<uint64_t> res = internal_lookup();
if (res.has_value())
{
return probability_t(res.value()) / Factorial::factorial(_weighted_draw_pile_size);
}
else
{
return std::nullopt;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::optional<std::uint64_t> HanabiState<num_suits, num_players, hand_size>::internal_lookup() const
2023-11-16 16:20:04 +01:00
{
if (_score == 5 * num_suits)
{
return Factorial::factorial(_weighted_draw_pile_size);
2023-11-16 16:20:04 +01:00
}
if (_pace < 0 or _endgame_turns_left == 0)
{
return 0;
}
const auto id = unique_id();
2024-01-14 18:04:27 +01:00
if (_position_tablebase.count(id) == 1)
2023-11-16 16:20:04 +01:00
{
return _position_tablebase.at(id);
}
else
{
return std::nullopt;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::rotate_next_draw(const Card & card)
{
auto card_it = std::find_if(_draw_pile.begin()
, _draw_pile.end()
, [&card, this](const CardMultiplicity & card_multiplicity) {
return (is_trash(card) and is_trash(card_multiplicity.card)) or
(card_multiplicity.card.rank == card.rank and card_multiplicity.card.suit == card.suit);
});
ASSERT(card_it != _draw_pile.end());
std::swap(*card_it, _draw_pile.front());
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
ActionType HanabiState<num_suits, num_players, hand_size>::last_action_type() const
{
ASSERT(not _actions_log.empty());
return _actions_log.top().action_type;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::save_state_to_map()
{
if (_save_memory)
{
return _weighted_draw_pile_size % 2 == 0;
}
else
{
return true;
}
}
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
probability_t HanabiState<num_suits, num_players, hand_size>::evaluate_state()
{
std::uint64_t num_wins = internal_evaluate_state();
return probability_t(num_wins)/ Factorial::factorial(_weighted_draw_pile_size);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::internal_evaluate_state()
2023-11-16 16:20:04 +01:00
{
ASSERT(_relative_representation.initialized);
_enumerated_states++;
const unsigned long id_of_state = unique_id();
if (_score == _score_goal)
{
return Factorial::factorial(_weighted_draw_pile_size);
2023-11-16 16:20:04 +01:00
}
2024-03-15 14:57:38 +01:00
if (_pace < 0 || _endgame_turns_left == 0 || _num_strikes > max_num_strikes)
{
2023-11-16 16:20:04 +01:00
return 0;
}
#ifndef GAME_STATE_NO_TABLEBASE_LOOKUP
2024-02-10 00:27:00 +01:00
auto lookup_it = _position_tablebase.find(id_of_state);
if (lookup_it != _position_tablebase.end())
{
return lookup_it->second;
}
2023-11-16 16:20:04 +01:00
#endif
2023-11-16 16:20:04 +01:00
// TODO: Have some endgame analysis here?
std::uint64_t best_probability = 0;
2023-11-16 16:20:04 +01:00
const std::array<Card, hand_size> & hand = _hands[_turn];
2023-11-16 16:20:04 +01:00
// First, check for playables
2024-01-16 17:19:42 +01:00
bool played_trash = false;
2023-11-16 16:20:04 +01:00
for (std::uint8_t index = 0; index < hand_size; index++)
{
2024-01-16 17:19:42 +01:00
if (is_playable(hand[index]) or (_num_clues == max_num_clues and _num_strikes < max_num_strikes and not is_critical(hand[index]) and (not is_trash(hand[index]) or not played_trash)))
2023-11-16 16:20:04 +01:00
{
2024-01-16 17:19:42 +01:00
if (is_trash(hand[index])) {
played_trash = true;
}
std::uint64_t const probability_play = check_play_or_discard(index, true);
2023-08-06 11:54:57 +02:00
2023-11-16 16:20:04 +01:00
best_probability = std::max(best_probability, probability_play);
if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
2023-11-16 16:20:04 +01:00
{
update_tablebase(id_of_state, best_probability);
return best_probability;
};
}
}
2023-11-16 16:20:04 +01:00
// Check for discards now
if (_pace > 0 and (_num_clues < max_num_clues or _num_strikes < max_num_strikes))
2023-11-16 16:20:04 +01:00
{
bool const play_card_instead_of_discarding = _num_clues == max_num_clues;
// This will hold the index of trash to discard
std::uint8_t const invalid_index = std::numeric_limits<std::uint8_t>::max();
std::uint8_t discard_index = invalid_index;
for (hand_index_t index = 0; index < hand_size; index++)
2023-11-16 16:20:04 +01:00
{
if (is_trash(hand[index]))
{
discard_index = index;
// All discards are equivalent, do not continue searching for different trash
break;
}
}
// If no trivial trash found, check for duplicates next
if (discard_index == invalid_index) {
for (std::uint8_t index = 0; index < hand_size; index++) {
Card const card = _hands[_turn][index];
auto it = std::find_if(_hands[_turn].begin() + index + 1, _hands[_turn].end(), [&card](Card const & card_in_hand) {
return card_in_hand == card;
2023-11-16 16:20:04 +01:00
});
if (it != _hands[_turn].end()) {
// found a duplicate to discard
discard_index = index;
// Since we are discarding essentially trash, we do not have to consider further actions
break;
}
}
}
// Discard if we found trash now
if (discard_index != invalid_index) {
std::uint64_t const probability_discard = check_play_or_discard(discard_index, play_card_instead_of_discarding);
best_probability = std::max(best_probability, probability_discard);
if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
{
update_tablebase(id_of_state, best_probability);
return best_probability;
};
} else {
// If we reach this state, then there are no dupes in hand, so we need to check if we want to
// sacrifice cards in hand
for(hand_index_t index = 0; index < hand_size; ++index) {
if(!is_critical(hand[index])) {
std::uint64_t const probability_sacrifice = check_play_or_discard(index, play_card_instead_of_discarding);
2024-01-12 18:05:15 +01:00
best_probability = std::max(best_probability, probability_sacrifice);
if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
{
update_tablebase(id_of_state, best_probability);
return best_probability;
};
}
2023-08-06 11:54:57 +02:00
}
2023-11-16 16:20:04 +01:00
}
}
2023-08-06 11:54:57 +02:00
2023-11-16 16:20:04 +01:00
// Last option is to stall
if (_num_clues >= clue_t(1))
2023-11-16 16:20:04 +01:00
{
give_clue();
const std::uint64_t probability_stall = internal_evaluate_state();
2023-11-16 16:20:04 +01:00
revert_clue();
best_probability = std::max(best_probability, probability_stall);
}
2023-11-16 16:20:04 +01:00
update_tablebase(id_of_state, best_probability);
return best_probability;
}
2024-01-12 18:05:15 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::check_play_or_discard(hand_index_t index, bool play) {
std::uint64_t sum_of_probabilities = 0;
2024-01-12 18:05:15 +01:00
do_for_each_potential_draw(index, play, [this, &sum_of_probabilities](const unsigned long multiplicity) {
sum_of_probabilities += internal_evaluate_state() * multiplicity;
2024-01-12 18:05:15 +01:00
});
return sum_of_probabilities;
2024-01-12 18:05:15 +01:00
}
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
template<class Function>
void
HanabiState<num_suits, num_players, hand_size>::do_for_each_potential_draw(hand_index_t index, bool play, Function f)
{
auto copy = _draw_pile;
auto do_action = [this, index, play]() {
if (play)
{
2024-03-15 14:57:38 +01:00
return play_and_potentially_update(index, true, false);
2023-11-16 16:20:04 +01:00
}
else
{
return discard_and_potentially_update(index, true);
}
};
2023-11-16 16:20:04 +01:00
auto revert_action = [this, play]() {
if (play)
{
revert_play(true);
}
else
{
revert_discard(true);
}
};
2023-11-16 16:20:04 +01:00
if (_draw_pile.empty())
{
do_action();
f(1);
revert_action();
}
else
{
unsigned sum_of_multiplicities = 0;
for (size_t i = 0; i < _draw_pile.size(); i++)
{
const unsigned long multiplicity = do_action();
sum_of_multiplicities += multiplicity;
f(multiplicity);
revert_action();
}
ASSERT(sum_of_multiplicities == _weighted_draw_pile_size);
}
ASSERT(_draw_pile == copy);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::unique_id() const
{
// Encode strikes first, since they will often be zero.
unsigned long id = _num_strikes;
2023-11-16 16:20:04 +01:00
// encode all positions of cards that started in draw pile
ASSERT(_relative_representation.card_positions_draw.size() == _relative_representation.good_cards_draw.size());
for (size_t i = 0; i < _relative_representation.card_positions_draw.size(); i++)
{
for (player_t player: _relative_representation.card_positions_draw[i])
{
id *= num_players + 3;
// We normalize here: If a card is already played, then the positions of its other copies
// do not matter, so we can just pretend that they are all in the trash already.
// The resulting states will be equivalent.
if (!is_trash(_relative_representation.good_cards_draw[i]))
{
id += player;
}
2023-11-16 16:20:04 +01:00
else
{
id += RelativeRepresentationData::discard_pile;
}
2023-11-16 16:20:04 +01:00
}
}
// encode positions of cards that started in hands
ASSERT(_relative_representation.card_positions_hands.size() == _relative_representation.good_cards_hands.size());
for(size_t i = 0; i < _relative_representation.card_positions_hands.size(); i++)
{
id *= 3;
// we have to normalize here again and pretend that cards already played are all discarded.
// Note that implicitly, this means that when we lose the last copy of a good card, this encoding pretends that
// the card has been played already.
// However, since we only ever consider actions that do not lose the last copy of a card, this is not a problem
// (unless our base state was already lacking cards, in which case the card is never considered played in any state)
if(is_trash(_relative_representation.good_cards_hands[i]))
{
id += static_cast<std::underlying_type_t<typename RelativeRepresentationData::CardPosition>>(RelativeRepresentationData::CardPosition::discarded);
}
else
{
id += static_cast<std::underlying_type_t<typename RelativeRepresentationData::CardPosition>>(_relative_representation.card_positions_hands[i]);
}
}
2023-11-16 16:20:04 +01:00
// encode number of clues
clue_t const scaled_clues = clue_t(2) * _num_clues;
assert(scaled_clues.denominator() == 1);
id *= (max_num_clues * clue_t(2)).numerator() + 1;
id += scaled_clues.numerator();
2023-11-16 16:20:04 +01:00
// we can encode draw pile size and extra turn in one metric, since we only have extra turns if draw pile is empty
const std::uint8_t draw_pile_size_and_extra_turns = [this]() -> uint8_t {
if (_endgame_turns_left == no_endgame)
{
return _weighted_draw_pile_size + num_players;
}
else
{
return _endgame_turns_left;
}
}();
2023-11-16 16:20:04 +01:00
id *= _relative_representation.initial_draw_pile_size + num_players;
id += draw_pile_size_and_extra_turns;
2023-11-16 16:20:04 +01:00
id *= num_players;
id += _turn;
2023-08-12 18:48:01 +02:00
2023-11-16 16:20:04 +01:00
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
// the number of clues, the draw pile size and whose turn it is.
// This already uniquely determines the current players position, assuming that we never discard good cards
// (and only play them)
2023-08-12 18:48:01 +02:00
2023-11-16 16:20:04 +01:00
return id;
}
2023-08-12 18:48:01 +02:00
2023-11-16 16:20:04 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::pair<std::vector<std::uint64_t>, std::vector<Card>>
HanabiState<num_suits, num_players, hand_size>::dump_unique_id_parts() const
{
std::vector<std::uint64_t> ret;
std::vector<Card> cards;
2023-08-12 18:48:01 +02:00
// encode strikes first
ret.push_back(_num_strikes);
2023-11-16 16:20:04 +01:00
// encode all positions of cards that started in draw pile
ASSERT(_relative_representation.card_positions_draw.size() == _relative_representation.good_cards_draw.size());
for (size_t i = 0; i < _relative_representation.card_positions_draw.size(); i++)
{
for (player_t player: _relative_representation.card_positions_draw[i])
{
// We normalize here: If a card is already played, then the positions of its other copies
// do not matter, so we can just pretend that they are all in the trash already.
// The resulting states will be equivalent.
if (!is_trash(_relative_representation.good_cards_draw[i]))
{
2023-11-16 16:20:04 +01:00
ret.push_back(player);
}
2023-11-16 16:20:04 +01:00
else
{
ret.push_back(RelativeRepresentationData::discard_pile);
}
cards.push_back(_relative_representation.good_cards_draw[i]);
}
2023-08-12 18:48:01 +02:00
}
// encode positions of cards that started in hands
ASSERT(_relative_representation.card_positions_hands.size() == _relative_representation.good_cards_hands.size());
for(size_t i = 0; i < _relative_representation.card_positions_hands.size(); i++)
{
// we have to normalize here again and pretend that cards already played are all discarded.
// Note that implicitly, this means that when we lose the last copy of a good card, this encoding pretends that
// the card has been played already.
// However, since we only ever consider actions that do not lose the last copy of a card, this is not a problem
// (unless our base state was already lacking cards, in which case the card is never considered played in any state)
if(is_trash(_relative_representation.good_cards_hands[i]))
{
ret.push_back(static_cast<std::underlying_type_t<typename RelativeRepresentationData::CardPosition>>(RelativeRepresentationData::CardPosition::discarded));
}
else
{
ret.push_back(static_cast<std::underlying_type_t<typename RelativeRepresentationData::CardPosition>>(_relative_representation.card_positions_hands[i]));
}
}
2023-11-16 16:20:04 +01:00
// encode number of clues
clue_t const scaled_clues = clue_t(2) * _num_clues;
assert(scaled_clues.denominator() == 1);
ret.push_back(scaled_clues.numerator());
2023-11-16 16:20:04 +01:00
// we can encode draw pile size and extra turn in one metric, since we only have extra turns if draw pile is empty
const std::uint8_t draw_pile_size_and_extra_turns = [this]() -> uint8_t {
if (_endgame_turns_left == no_endgame)
{
return _weighted_draw_pile_size + num_players;
}
else
{
return _endgame_turns_left;
}
}();
ret.push_back(draw_pile_size_and_extra_turns);
2023-08-08 00:31:16 +02:00
2023-11-16 16:20:04 +01:00
ret.push_back(_turn);
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
// the number of clues, the draw pile size and whose turn it is.
// This already uniquely determines the current players position, assuming that we never discard good cards
// (and only play them)
return {ret, cards};
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
const map_type<unsigned long, std::uint64_t> &
2023-11-16 16:20:04 +01:00
HanabiState<num_suits, num_players, hand_size>::position_tablebase() const
{
return _position_tablebase;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
size_t HanabiState<num_suits, num_players, hand_size>::draw_pile_size() const
{
return _weighted_draw_pile_size;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_relative_state_initialized() const
{
return _relative_representation.initialized;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::update_tablebase(
unsigned long id, std::uint64_t probability
2023-11-16 16:20:04 +01:00
)
{
2024-01-09 01:10:27 +01:00
// This macro can be activated if we want to dump details on all game states visited for analysis purposes.
#ifdef DUMP_STATES
if (id == 87476369689) {
std::cout << *this << std::endl;
const auto [id_parts, cards] = dump_unique_id_parts();
std::cout << "id is: " << id << ", id parts are: ";
for (auto const & part: id_parts) {
std::cout << part << " ";
}
std::cout << ", encoded cards are ";
for (auto const & part: cards) {
std::cout << part << " ";
}
std::cout << ", probability is ";
print_probability(std::cout, probability);
std::cout << "\n" << std::endl;
2024-01-09 01:10:27 +01:00
}
#endif
2024-02-10 00:27:00 +01:00
#ifndef NDEBUG
2024-01-14 18:04:27 +01:00
if (_position_tablebase.count(id) == 1)
2023-11-16 16:20:04 +01:00
{
ASSERT(_position_tablebase[id] == probability);
}
2024-02-10 00:27:00 +01:00
#endif
if (save_state_to_map())
{
_position_tablebase[id] = probability;
}
2023-11-16 16:20:04 +01:00
}
2023-08-05 11:55:46 +02:00
} // namespace Hanabi