Maximilian Keßler
5b0834bc22
This ensures that when doing a regular revert, we revert to exactly the same state (i.e. also identical ordering of draw pile by default) and only rotate the draw pile in case we ensure internally that we iterate over all possible draws so that the ordering is restored in the end.
975 lines
No EOL
39 KiB
C++
975 lines
No EOL
39 KiB
C++
#include <algorithm>
|
|
#include <iostream>
|
|
|
|
#include "myassert.h"
|
|
#include "game_state.h"
|
|
|
|
namespace Hanabi {
|
|
|
|
template<typename T>
|
|
std::ostream& print_probability(std::ostream& os, const std::optional<T>& prob) {
|
|
if (prob.has_value()) {
|
|
return print_probability(os, prob.value());
|
|
} else {
|
|
os << "unknown";
|
|
}
|
|
return os;
|
|
}
|
|
|
|
std::ostream& print_probability(std::ostream& os, const rational_probability & prob) {
|
|
os << prob << " ~ " << std::setprecision(5) << boost::rational_cast<double>(prob) * 100 << "%";
|
|
return os;
|
|
}
|
|
|
|
std::ostream& print_probability(std::ostream& os, double prob) {
|
|
os << std::setprecision(5) << prob;
|
|
return os;
|
|
}
|
|
|
|
std::ostream &operator<<(std::ostream &os, HanabiStateIF const &hanabi_state) {
|
|
hanabi_state.print(os);
|
|
return os;
|
|
}
|
|
|
|
std::string to_string(const Hanabi::Card &card) {
|
|
if (card == Hanabi::Cards::trash) {
|
|
return "kt";
|
|
} else {
|
|
return Hanabi::suit_initials[card.suit] + std::to_string(5 - card.rank);
|
|
}
|
|
}
|
|
|
|
std::ostream &operator<<(std::ostream &os, Action const& action) {
|
|
switch(action.type) {
|
|
case ActionType::play:
|
|
os << "play " + to_string(action.card);
|
|
break;
|
|
case ActionType::discard:
|
|
os << "discard";
|
|
break;
|
|
case ActionType::clue:
|
|
os << "clue";
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return os;
|
|
}
|
|
|
|
bool Card::operator==(const Card &other) const {
|
|
return suit == other.suit and rank == other.rank;
|
|
}
|
|
|
|
std::ostream &operator<<(std::ostream &os, const Card &card) {
|
|
os << to_string(card);
|
|
return os;
|
|
}
|
|
|
|
template<size_t num_suits>
|
|
std::ostream &operator<<(std::ostream &os, const Stacks<num_suits> &stacks) {
|
|
for (size_t i = 0; i < stacks.size(); i++) {
|
|
os << suit_initials[i] << starting_card_rank - stacks[i];
|
|
if(i < stacks.size() - 1) {
|
|
os << ", ";
|
|
}
|
|
}
|
|
return os;
|
|
}
|
|
|
|
template<suit_t num_suits, typename T>
|
|
void CardArray<num_suits, T>::fill(T val) {
|
|
for (size_t suit = 0; suit < num_suits; suit++) {
|
|
for (rank_t rank = 0; rank < starting_card_rank; rank++) {
|
|
_array[suit][rank] = val;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<suit_t num_suits, typename T>
|
|
CardArray<num_suits, T>::CardArray(T default_val) {
|
|
fill(default_val);
|
|
}
|
|
|
|
template<suit_t num_suits, typename T>
|
|
const T &CardArray<num_suits, T>::operator[](const Card &card) const {
|
|
return _array[card.suit][card.rank];
|
|
};
|
|
|
|
template<suit_t num_suits, typename T>
|
|
T &CardArray<num_suits, T>::operator[](const Card &card) {
|
|
return _array[card.suit][card.rank];
|
|
};
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
HanabiState<num_suits, num_players, hand_size>::BacktrackAction::BacktrackAction(
|
|
Hanabi::ActionType action_type, Hanabi::Card discarded_or_played, Hanabi::hand_index_t index,
|
|
bool was_on_8_clues
|
|
):
|
|
action_type(action_type),
|
|
discarded(discarded_or_played),
|
|
index(index),
|
|
was_on_8_clues(was_on_8_clues) {
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> &deck, uint8_t score_goal):
|
|
_turn(0),
|
|
_num_clues(max_num_clues),
|
|
_weighted_draw_pile_size(deck.size()),
|
|
_stacks(),
|
|
_hands(),
|
|
_draw_pile(),
|
|
_endgame_turns_left(no_endgame),
|
|
_pace(deck.size() - score_goal - num_players * (hand_size - 1)),
|
|
_score(0),
|
|
_score_goal(score_goal),
|
|
_actions_log(),
|
|
_relative_representation(),
|
|
_position_tablebase(),
|
|
_enumerated_states(0) {
|
|
std::ranges::fill(_stacks, starting_card_rank);
|
|
for (const Card &card: deck) {
|
|
_draw_pile.push_back({card, 1});
|
|
}
|
|
for (player_t player = 0; player < num_players; player++) {
|
|
for (std::uint8_t index = 0; index < hand_size; index++) {
|
|
draw(index);
|
|
}
|
|
incr_turn();
|
|
}
|
|
ASSERT(_turn == 0);
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::give_clue() {
|
|
ASSERT(_num_clues > 0);
|
|
--_num_clues;
|
|
|
|
_actions_log.emplace(ActionType::clue, Cards::unknown, 0);
|
|
incr_turn();
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::incr_turn() {
|
|
_turn = (_turn + 1) % num_players;
|
|
if (_endgame_turns_left != no_endgame) {
|
|
_endgame_turns_left--;
|
|
}
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::decr_turn() {
|
|
_turn = (_turn + num_players - 1) % num_players;
|
|
if (_endgame_turns_left != no_endgame) {
|
|
_endgame_turns_left++;
|
|
}
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::check_draw_pile_integrity() const
|
|
{
|
|
if (not _relative_representation.initialized)
|
|
{
|
|
return;
|
|
}
|
|
if (_draw_pile.size() >= 2) {
|
|
auto copy = _draw_pile;
|
|
copy.sort([](CardMultiplicity const & card1, CardMultiplicity const & card2){
|
|
return card1.card.rank < card2.card.rank or (card1.card.rank == card2.card.rank and card1.card.suit < card2.card.suit);
|
|
});
|
|
auto before = copy.begin();
|
|
for(auto it = std::next(copy.begin()); it != copy.end(); ++it) {
|
|
ASSERT(before->card != it->card);
|
|
++before;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
bool HanabiState<num_suits, num_players, hand_size>::is_playable(const Hanabi::Card &card) const {
|
|
return card.rank == _stacks[card.suit] - 1;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
std::uint64_t HanabiState<num_suits, num_players, hand_size>::enumerated_states() const {
|
|
return _enumerated_states;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
bool HanabiState<num_suits, num_players, hand_size>::is_trash(const Hanabi::Card &card) const {
|
|
return card.rank >= _stacks[card.suit];
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::play(Hanabi::hand_index_t index) {
|
|
play_and_potentially_update(index);
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
unsigned long HanabiState<num_suits, num_players, hand_size>::play_and_potentially_update(hand_index_t index) {
|
|
check_draw_pile_integrity();
|
|
ASSERT(index < _hands[_turn].size());
|
|
const Card played_card = _hands[_turn][index];
|
|
if (!is_playable(played_card)) {
|
|
const unsigned long multiplicity = draw(index);
|
|
incr_turn();
|
|
return multiplicity;
|
|
}
|
|
ASSERT(is_playable(played_card));
|
|
|
|
_actions_log.emplace(ActionType::play, played_card, index, _num_clues == 8);
|
|
|
|
--_stacks[played_card.suit];
|
|
_score++;
|
|
|
|
if (played_card.rank == 0 and _num_clues < max_num_clues) {
|
|
// update clues if we played the last played_card of a stack
|
|
_num_clues++;
|
|
}
|
|
|
|
const unsigned long multiplicity = draw(index);
|
|
|
|
incr_turn();
|
|
check_draw_pile_integrity();
|
|
return multiplicity;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::discard(hand_index_t index) {
|
|
discard_and_potentially_update(index);
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
unsigned long HanabiState<num_suits, num_players, hand_size>::discard_and_potentially_update(hand_index_t index) {
|
|
check_draw_pile_integrity();
|
|
ASSERT(index < _hands[_turn].size());
|
|
ASSERT(_num_clues != max_num_clues);
|
|
|
|
const Card discarded_card = _hands[_turn][index];
|
|
_num_clues++;
|
|
_pace--;
|
|
|
|
unsigned long multiplicity = draw(index);
|
|
_actions_log.emplace(ActionType::discard, discarded_card, index);
|
|
|
|
incr_turn();
|
|
check_draw_pile_integrity();
|
|
return multiplicity;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
std::uint8_t HanabiState<num_suits, num_players, hand_size>::find_card_in_hand(
|
|
const Hanabi::Card &card) const {
|
|
for (std::uint8_t i = 0; i < hand_size; i++) {
|
|
if (_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) {
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::print(std::ostream &os) const {
|
|
os << "Stacks: " << _stacks << " (score " << +_score << ")";
|
|
os << ", clues: " << +_num_clues << ", turn: " << +_turn;
|
|
if (_endgame_turns_left != no_endgame) {
|
|
os << ", " << +_endgame_turns_left << " turns left";
|
|
}
|
|
os << std::endl;
|
|
os << "Draw pile: ";
|
|
unsigned num_trash = 0;
|
|
for (const auto &[card, mul]: _draw_pile) {
|
|
if (is_trash(card)) {
|
|
num_trash += mul;
|
|
continue;
|
|
}
|
|
os << card;
|
|
if (mul > 1) {
|
|
os << " (" << +mul << ")";
|
|
}
|
|
os << ", ";
|
|
}
|
|
if (num_trash > 0) {
|
|
os << Cards::trash << " (" << num_trash << ") ";
|
|
}
|
|
os << "[size " << +_weighted_draw_pile_size << "]" << std::endl;
|
|
os << "Hands: ";
|
|
for (const auto &hand: _hands) {
|
|
os << "[";
|
|
for(hand_index_t index = 0; index < hand.size(); index++) {
|
|
os << hand[index];
|
|
if (index < hand.size() - 1) {
|
|
os << " ";
|
|
}
|
|
}
|
|
os << "] ";
|
|
}
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
unsigned HanabiState<num_suits, num_players, hand_size>::draw(uint8_t index) {
|
|
ASSERT(index < _hands[_turn].size());
|
|
|
|
// update card position of the card we are about to discard
|
|
if (_relative_representation.initialized) {
|
|
const Card discarded = _hands[_turn][index];
|
|
if (!discarded.initial_trash) {
|
|
if (discarded.in_starting_hand) {
|
|
ASSERT(_relative_representation.card_positions_hands[discarded.local_index] == true);
|
|
_relative_representation.card_positions_hands[discarded.local_index] = false;
|
|
} else {
|
|
auto replaced_card_it = std::ranges::find(_relative_representation.card_positions_draw[discarded.local_index], _turn);
|
|
ASSERT(replaced_card_it != _relative_representation.card_positions_draw[discarded.local_index].end());
|
|
*replaced_card_it = trash_or_play_stack;
|
|
std::ranges::sort(_relative_representation.card_positions_draw[discarded.local_index]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// draw a new card if the draw pile is not empty
|
|
if (!_draw_pile.empty()) {
|
|
--_weighted_draw_pile_size;
|
|
|
|
const CardMultiplicity draw = _draw_pile.front();
|
|
_draw_pile.pop_front();
|
|
ASSERT(draw.multiplicity > 0);
|
|
|
|
if (draw.multiplicity > 1) {
|
|
_draw_pile.push_back(draw);
|
|
_draw_pile.back().multiplicity--;
|
|
}
|
|
|
|
if (_relative_representation.initialized) {
|
|
// update card position of the drawn card
|
|
if (!draw.card.initial_trash) {
|
|
ASSERT(draw.card.in_starting_hand == false);
|
|
auto new_card_it = std::ranges::find(_relative_representation.card_positions_draw[draw.card.local_index], draw_pile);
|
|
ASSERT(new_card_it != _relative_representation.card_positions_draw[draw.card.local_index].end());
|
|
*new_card_it = _turn;
|
|
std::ranges::sort(_relative_representation.card_positions_draw[draw.card.local_index]);
|
|
}
|
|
}
|
|
|
|
_hands[_turn][index] = draw.card;
|
|
|
|
if (_draw_pile.empty()) {
|
|
// Note the +1, since we will immediately decrement this when moving to the next player
|
|
_endgame_turns_left = num_players + 1;
|
|
}
|
|
return draw.multiplicity;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::revert_draw(std::uint8_t index, Card discarded_card, bool cycle) {
|
|
if (_endgame_turns_left == num_players + 1 || _endgame_turns_left == no_endgame) {
|
|
// Put the card that is currently in hand back into the draw pile
|
|
ASSERT(index < _hands[_turn].size());
|
|
const Card &drawn = _hands[_turn][index];
|
|
|
|
if (cycle)
|
|
{
|
|
// put discarded_card back into draw pile (at the back)
|
|
if (!_draw_pile.empty() and _draw_pile.back().card.suit == drawn.suit and
|
|
_draw_pile.back().card.rank == drawn.rank) {
|
|
_draw_pile.back().multiplicity++;
|
|
} else {
|
|
_draw_pile.push_back({drawn, 1});
|
|
}
|
|
} else
|
|
{
|
|
if (!_draw_pile.empty() and _draw_pile.front().card.suit == drawn.suit and
|
|
_draw_pile.front().card.rank == drawn.rank) {
|
|
_draw_pile.front().multiplicity++;
|
|
} else {
|
|
_draw_pile.push_front({drawn, 1});
|
|
}
|
|
}
|
|
|
|
if (_relative_representation.initialized && !drawn.initial_trash) {
|
|
ASSERT(drawn.in_starting_hand == false);
|
|
auto drawn_card_it = std::ranges::find(_relative_representation.card_positions_draw[drawn.local_index], _turn);
|
|
ASSERT(drawn_card_it != _relative_representation.card_positions_draw[drawn.local_index].end());
|
|
*drawn_card_it = draw_pile;
|
|
std::ranges::sort(_relative_representation.card_positions_draw[drawn.local_index]);
|
|
}
|
|
|
|
_weighted_draw_pile_size++;
|
|
_endgame_turns_left = no_endgame;
|
|
} else {
|
|
ASSERT(_hands[_turn][index] == discarded_card);
|
|
}
|
|
|
|
if (_relative_representation.initialized && !discarded_card.initial_trash) {
|
|
if (discarded_card.in_starting_hand) {
|
|
ASSERT(_relative_representation.card_positions_hands[discarded_card.local_index] == false);
|
|
_relative_representation.card_positions_hands[discarded_card.local_index] = true;
|
|
} else {
|
|
auto hand_card_it = std::ranges::find(_relative_representation.card_positions_draw[discarded_card.local_index],
|
|
trash_or_play_stack);
|
|
ASSERT(hand_card_it != _relative_representation.card_positions_draw[discarded_card.local_index].end());
|
|
*hand_card_it = _turn;
|
|
std::ranges::sort(_relative_representation.card_positions_draw[discarded_card.local_index]);
|
|
}
|
|
}
|
|
|
|
_hands[_turn][index] = discarded_card;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::init_backtracking_information() {
|
|
// Note that this function does not have to be particularly performant, we only call it once to initialize.
|
|
const Card trash = [this]() -> Card {
|
|
for (suit_t suit = 0; suit < num_suits; suit++) {
|
|
if (_stacks[suit] < starting_card_rank) {
|
|
return {suit, starting_card_rank - 1, 0, false, true};
|
|
}
|
|
}
|
|
return {0, 0};
|
|
}();
|
|
|
|
CardArray<num_suits, std::uint8_t> nums_in_draw_pile;
|
|
for (const auto [card, multiplicity]: _draw_pile) {
|
|
if (_stacks[card.suit] > card.rank) {
|
|
nums_in_draw_pile[card] += multiplicity;
|
|
} else {
|
|
nums_in_draw_pile[trash] += multiplicity;
|
|
}
|
|
}
|
|
|
|
// Prepare draw pile
|
|
_draw_pile.clear();
|
|
for (suit_t suit = 0; suit < num_suits; suit++) {
|
|
for (rank_t rank = 0; rank < starting_card_rank; rank++) {
|
|
Card card{suit, rank, static_cast<uint8_t>(_relative_representation.card_positions_draw.size()), false, is_trash(card)};
|
|
if (nums_in_draw_pile[card] > 0) {
|
|
_draw_pile.push_back({card, nums_in_draw_pile[card]});
|
|
if (!is_trash(card)) {
|
|
_relative_representation.card_positions_draw.push_back({});
|
|
_relative_representation.card_positions_draw.back().resize(nums_in_draw_pile[card], draw_pile);
|
|
_relative_representation.good_cards_draw.push_back(card);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
_relative_representation.initial_draw_pile_size = _weighted_draw_pile_size;
|
|
|
|
// Prepare cards in hands
|
|
for (player_t player = 0; player < num_players; player++) {
|
|
for (Card &card: _hands[player]) {
|
|
card.initial_trash = is_trash(card);
|
|
card.in_starting_hand = true;
|
|
// Needed to check for dupes in same hand
|
|
boost::container::static_vector<Card, hand_size> good_cards_in_hand;
|
|
if (!is_trash(card)) {
|
|
if (std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0) {
|
|
// This card is already in hand, so just replace the second copy by some trash
|
|
card = trash;
|
|
} else {
|
|
card.local_index = _relative_representation.num_useful_cards_in_starting_hands;
|
|
_relative_representation.num_useful_cards_in_starting_hands++;
|
|
|
|
good_cards_in_hand.push_back(card);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
_relative_representation.card_positions_hands.reset();
|
|
for (size_t i = 0; i < _relative_representation.num_useful_cards_in_starting_hands; i++) {
|
|
_relative_representation.card_positions_hands[i] = true;
|
|
}
|
|
_relative_representation.initialized = true;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void
|
|
HanabiState<num_suits, num_players, hand_size>::revert_play(bool cycle) {
|
|
check_draw_pile_integrity();
|
|
const BacktrackAction last_action = _actions_log.top();
|
|
_actions_log.pop();
|
|
ASSERT(last_action.action_type == ActionType::play);
|
|
ASSERT(!last_action.was_on_8_clues or _num_clues == 8);
|
|
|
|
decr_turn();
|
|
if (last_action.discarded.rank == 0 and not last_action.was_on_8_clues) {
|
|
_num_clues--;
|
|
}
|
|
revert_draw(last_action.index, last_action.discarded, cycle);
|
|
if(_stacks[last_action.discarded.suit] == last_action.discarded.rank) {
|
|
_stacks[last_action.discarded.suit]++;
|
|
}
|
|
_score--;
|
|
check_draw_pile_integrity();
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::revert_discard(bool cycle) {
|
|
check_draw_pile_integrity();
|
|
const BacktrackAction last_action = _actions_log.top();
|
|
_actions_log.pop();
|
|
|
|
ASSERT(last_action.action_type == ActionType::discard);
|
|
|
|
decr_turn();
|
|
ASSERT(_num_clues > 0);
|
|
|
|
_num_clues--;
|
|
_pace++;
|
|
|
|
revert_draw(last_action.index, last_action.discarded, cycle);
|
|
check_draw_pile_integrity();
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::revert_clue() {
|
|
const BacktrackAction last_action = _actions_log.top();
|
|
_actions_log.pop();
|
|
|
|
ASSERT(last_action.action_type == ActionType::clue);
|
|
|
|
decr_turn();
|
|
ASSERT(_num_clues < max_num_clues);
|
|
|
|
_num_clues++;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::revert() {
|
|
switch(_actions_log.top().action_type) {
|
|
case ActionType::clue:
|
|
revert_clue();
|
|
break;
|
|
case ActionType::discard:
|
|
revert_discard();
|
|
break;
|
|
case ActionType::play:
|
|
revert_play();
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::modify_clues(Hanabi::clue_t change)
|
|
{
|
|
_num_clues += change;
|
|
if (_num_clues > 8) {
|
|
_num_clues = 8;
|
|
}
|
|
if (_num_clues < 0) {
|
|
_num_clues = 0;
|
|
}
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::set_clues(Hanabi::clue_t clues) {
|
|
assert(0 <= clues);
|
|
assert(clues <= 8);
|
|
_num_clues = clues;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
player_t HanabiState<num_suits, num_players, hand_size>::turn() const {
|
|
return _turn;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
clue_t HanabiState<num_suits, num_players, hand_size>::num_clues() const {
|
|
return _num_clues;
|
|
};
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
std::vector<std::vector<Card>> HanabiState<num_suits, num_players, hand_size>::hands() const {
|
|
std::vector<std::vector<Card>> hands;
|
|
for(player_t player = 0; player < num_players; player++) {
|
|
hands.push_back({});
|
|
for(const Card& card: _hands[player]) {
|
|
hands.back().push_back(card);
|
|
}
|
|
}
|
|
return hands;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
std::vector<Card> HanabiState<num_suits, num_players, hand_size>::cur_hand() const {
|
|
std::vector<Card> hand;
|
|
for(const Card& card: _hands[_turn]) {
|
|
hand.push_back(card);
|
|
}
|
|
return hand;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> HanabiState<num_suits, num_players, hand_size>::possible_next_states(hand_index_t index, bool play) {
|
|
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> next_states;
|
|
do_for_each_potential_draw(index, play, [this, &next_states, &index](unsigned multiplicity){
|
|
auto prob = lookup();
|
|
|
|
// bit hacky to get drawn card here
|
|
decr_turn();
|
|
const CardMultiplicity drawn_card = {_hands[_turn][index], multiplicity};
|
|
incr_turn();
|
|
|
|
next_states.emplace_back(drawn_card, prob);
|
|
});
|
|
return next_states;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
std::vector<std::pair<Action, std::optional<probability_t>>> HanabiState<num_suits, num_players, hand_size>::get_reasonable_actions() {
|
|
std::vector<std::pair<Action, std::optional<probability_t>>> reasonable_actions {};
|
|
|
|
if(_score == _score_goal or _pace < 0 or _endgame_turns_left == 0) {
|
|
return reasonable_actions;
|
|
}
|
|
|
|
const std::array<Card, hand_size>& hand = _hands[_turn];
|
|
// First, check for playable cards
|
|
for(std::uint8_t index = 0; index < hand_size; index++) {
|
|
if(is_playable(hand[index])) {
|
|
const Action action = {ActionType::play, hand[index]};
|
|
bool known = true;
|
|
probability_t sum_of_probabilities = 0;
|
|
|
|
do_for_each_potential_draw(index, true, [this, &sum_of_probabilities, &known](const unsigned long multiplicity){
|
|
const std::optional<probability_t> prob = lookup();
|
|
if (prob.has_value()) {
|
|
sum_of_probabilities += prob.value() * multiplicity;
|
|
} else {
|
|
known = false;
|
|
}
|
|
});
|
|
|
|
if (known) {
|
|
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
|
|
const probability_t probability_play = sum_of_probabilities / total_weight;
|
|
reasonable_actions.emplace_back(action, probability_play);
|
|
} else {
|
|
reasonable_actions.emplace_back(action, std::nullopt);
|
|
}
|
|
}
|
|
}
|
|
|
|
if(_pace > 0 and _num_clues < max_num_clues) {
|
|
for(std::uint8_t index = 0; index < hand_size; index++) {
|
|
if (is_trash(hand[index])) {
|
|
const Action action = {ActionType::discard, hand[index]};
|
|
bool known = true;
|
|
probability_t sum_of_probabilities = 0;
|
|
|
|
do_for_each_potential_draw(index, false, [this, &sum_of_probabilities, &known](const unsigned long multiplicity){
|
|
const std::optional<probability_t> prob = lookup();
|
|
if (prob.has_value()) {
|
|
sum_of_probabilities += prob.value() * multiplicity;
|
|
} else {
|
|
known = false;
|
|
}
|
|
});
|
|
|
|
if (known) {
|
|
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
|
|
const probability_t probability_discard = sum_of_probabilities / total_weight;
|
|
reasonable_actions.emplace_back(action, probability_discard);
|
|
} else {
|
|
reasonable_actions.emplace_back(action, std::nullopt);
|
|
}
|
|
|
|
// All discards are equivalent, do not continue searching for different trash
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(_num_clues > 0) {
|
|
give_clue();
|
|
const std::optional<probability_t> prob = lookup();
|
|
const Action action = {ActionType::clue, Cards::unknown};
|
|
reasonable_actions.emplace_back(action, prob);
|
|
revert_clue();
|
|
}
|
|
return reasonable_actions;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
std::optional<probability_t> HanabiState<num_suits, num_players, hand_size>::lookup() const {
|
|
if (_score == 5 * num_suits) {
|
|
return 1;
|
|
}
|
|
if (_pace < 0 or _endgame_turns_left == 0) {
|
|
return 0;
|
|
}
|
|
const auto id = unique_id();
|
|
if(_position_tablebase.contains(id)) {
|
|
return _position_tablebase.at(id);
|
|
} else {
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::rotate_next_draw(const Card& card) {
|
|
auto card_it = std::find_if(_draw_pile.begin(), _draw_pile.end(), [&card, this](const CardMultiplicity& card_multiplicity){
|
|
return (is_trash(card) and is_trash(card_multiplicity.card)) or (card_multiplicity.card.rank == card.rank and card_multiplicity.card.suit == card.suit);
|
|
});
|
|
ASSERT(card_it != _draw_pile.end());
|
|
std::swap(*card_it, _draw_pile.front());
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
ActionType HanabiState<num_suits, num_players, hand_size>::last_action_type() const
|
|
{
|
|
assert(not _actions_log.empty());
|
|
return _actions_log.top().action_type;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
probability_t HanabiState<num_suits, num_players, hand_size>::evaluate_state() {
|
|
ASSERT(_relative_representation.initialized);
|
|
_enumerated_states++;
|
|
const unsigned long id_of_state = unique_id();
|
|
|
|
if (_score == _score_goal) {
|
|
return 1;
|
|
}
|
|
if(_pace < 0 || _endgame_turns_left == 0) {
|
|
return 0;
|
|
}
|
|
if (_position_tablebase.contains(id_of_state)) {
|
|
return _position_tablebase[id_of_state];
|
|
}
|
|
|
|
// TODO: Have some endgame analysis here?
|
|
|
|
probability_t best_probability = 0;
|
|
const std::array<Card, hand_size>& hand = _hands[_turn];
|
|
|
|
// First, check for playables
|
|
for(std::uint8_t index = 0; index < hand_size; index++) {
|
|
if(is_playable(hand[index])) {
|
|
probability_t sum_of_probabilities = 0;
|
|
|
|
do_for_each_potential_draw(index, true, [this, &sum_of_probabilities](const unsigned long multiplicity){
|
|
sum_of_probabilities += evaluate_state() * multiplicity;
|
|
});
|
|
|
|
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
|
|
const probability_t probability_play = sum_of_probabilities / total_weight;
|
|
|
|
best_probability = std::max(best_probability, probability_play);
|
|
if (best_probability == 1) {
|
|
update_tablebase(id_of_state, best_probability);
|
|
return best_probability;
|
|
};
|
|
}
|
|
}
|
|
|
|
// Check for discards now
|
|
if(_pace > 0 and _num_clues < max_num_clues) {
|
|
for(std::uint8_t index = 0; index < hand_size; index++) {
|
|
if (is_trash(hand[index])) {
|
|
probability_t sum_of_probabilities = 0;
|
|
|
|
do_for_each_potential_draw(index, false, [this, &sum_of_probabilities](const unsigned long multiplicity){
|
|
sum_of_probabilities += evaluate_state() * multiplicity;
|
|
});
|
|
|
|
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
|
|
const probability_t probability_discard = sum_of_probabilities / total_weight;
|
|
best_probability = std::max(best_probability, probability_discard);
|
|
|
|
best_probability = std::max(best_probability, probability_discard);
|
|
if (best_probability == 1) {
|
|
update_tablebase(id_of_state, best_probability);
|
|
return best_probability;
|
|
};
|
|
|
|
// All discards are equivalent, do not continue searching for different trash
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Last option is to stall
|
|
if(_num_clues > 0) {
|
|
give_clue();
|
|
const probability_t probability_stall = evaluate_state();
|
|
revert_clue();
|
|
best_probability = std::max(best_probability, probability_stall);
|
|
if (best_probability == 1) {
|
|
update_tablebase(id_of_state, best_probability);
|
|
return best_probability;
|
|
};
|
|
}
|
|
|
|
update_tablebase(id_of_state, best_probability);
|
|
return best_probability;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
template<class Function>
|
|
void HanabiState<num_suits, num_players, hand_size>::do_for_each_potential_draw(hand_index_t index, bool play, Function f) {
|
|
auto copy = _draw_pile;
|
|
auto do_action = [this, index, play](){
|
|
if (play) {
|
|
return play_and_potentially_update(index);
|
|
} else {
|
|
return discard_and_potentially_update(index);
|
|
}
|
|
};
|
|
|
|
auto revert_action = [this, play](){
|
|
if (play) {
|
|
revert_play(true);
|
|
} else {
|
|
revert_discard(true);
|
|
}
|
|
};
|
|
|
|
if(_draw_pile.empty()) {
|
|
do_action();
|
|
f(1);
|
|
revert_action();
|
|
} else {
|
|
unsigned sum_of_multiplicities = 0;
|
|
[[maybe_unused]] const auto pile = _draw_pile;
|
|
for(size_t i = 0; i < _draw_pile.size(); i++) {
|
|
const unsigned long multiplicity = do_action();
|
|
sum_of_multiplicities += multiplicity;
|
|
f(multiplicity);
|
|
revert_action();
|
|
}
|
|
ASSERT(sum_of_multiplicities == _weighted_draw_pile_size);
|
|
}
|
|
ASSERT(_draw_pile == copy);
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
std::uint64_t HanabiState<num_suits, num_players, hand_size>::unique_id() const {
|
|
unsigned long id = 0;
|
|
|
|
// encode all positions of cards that started in draw pile
|
|
ASSERT(_relative_representation.card_positions_draw.size() == _relative_representation.good_cards_draw.size());
|
|
for(size_t i = 0; i < _relative_representation.card_positions_draw.size(); i++) {
|
|
for(player_t player : _relative_representation.card_positions_draw[i]) {
|
|
id *= num_players + 2;
|
|
// We normalize here: If a card is already played, then the positions of its other copies
|
|
// do not matter, so we can just pretend that they are all in the trash already.
|
|
// The resulting states will be equivalent.
|
|
if (!is_trash(_relative_representation.good_cards_draw[i])) {
|
|
id += player;
|
|
} else {
|
|
id += trash_or_play_stack;
|
|
}
|
|
}
|
|
}
|
|
|
|
// encode number of clues
|
|
id *= max_num_clues + 1;
|
|
id += _num_clues;
|
|
|
|
// we can encode draw pile size and extra turn in one metric, since we only have extra turns if draw pile is empty
|
|
const std::uint8_t draw_pile_size_and_extra_turns = [this]() -> uint8_t {
|
|
if(_endgame_turns_left == no_endgame) {
|
|
return _weighted_draw_pile_size + num_players;
|
|
}
|
|
else {
|
|
return _endgame_turns_left;
|
|
}
|
|
}();
|
|
|
|
id *= _relative_representation.initial_draw_pile_size + num_players;
|
|
id += draw_pile_size_and_extra_turns;
|
|
|
|
// encode positions of cards that started in hands
|
|
id = id << _relative_representation.num_useful_cards_in_starting_hands;
|
|
id += _relative_representation.card_positions_hands.to_ulong();
|
|
|
|
id *= num_players;
|
|
id += _turn;
|
|
|
|
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
|
|
// the number of clues, the draw pile size and whose turn it is.
|
|
// This already uniquely determines the current players position, assuming that we never discard good cards
|
|
// (and only play them)
|
|
|
|
return id;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
std::pair<std::vector<std::uint64_t>, std::vector<Card>> HanabiState<num_suits, num_players, hand_size>::dump_unique_id_parts() const {
|
|
std::vector<std::uint64_t> ret;
|
|
std::vector<Card> cards;
|
|
|
|
// encode all positions of cards that started in draw pile
|
|
ASSERT(_relative_representation.card_positions_draw.size() == _relative_representation.good_cards_draw.size());
|
|
for(size_t i = 0; i < _relative_representation.card_positions_draw.size(); i++) {
|
|
for(player_t player : _relative_representation.card_positions_draw[i]) {
|
|
// We normalize here: If a card is already played, then the positions of its other copies
|
|
// do not matter, so we can just pretend that they are all in the trash already.
|
|
// The resulting states will be equivalent.
|
|
if (!is_trash(_relative_representation.good_cards_draw[i])) {
|
|
ret.push_back(player);
|
|
} else {
|
|
ret.push_back(trash_or_play_stack);
|
|
}
|
|
cards.push_back(_relative_representation.good_cards_draw[i]);
|
|
}
|
|
}
|
|
|
|
// encode number of clues
|
|
ret.push_back(_num_clues);
|
|
|
|
// we can encode draw pile size and extra turn in one metric, since we only have extra turns if draw pile is empty
|
|
const std::uint8_t draw_pile_size_and_extra_turns = [this]() -> uint8_t {
|
|
if(_endgame_turns_left == no_endgame) {
|
|
return _weighted_draw_pile_size + num_players;
|
|
}
|
|
else {
|
|
return _endgame_turns_left;
|
|
}
|
|
}();
|
|
|
|
ret.push_back(draw_pile_size_and_extra_turns);
|
|
|
|
// encode positions of cards that started in hands
|
|
ret.push_back(_relative_representation.card_positions_hands.to_ulong());
|
|
|
|
ret.push_back(_turn);
|
|
|
|
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
|
|
// the number of clues, the draw pile size and whose turn it is.
|
|
// This already uniquely determines the current players position, assuming that we never discard good cards
|
|
// (and only play them)
|
|
|
|
return {ret, cards};
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
const std::unordered_map<unsigned long, probability_t>& HanabiState<num_suits, num_players, hand_size>::position_tablebase() const {
|
|
return _position_tablebase;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
size_t HanabiState<num_suits, num_players, hand_size>::draw_pile_size() const {
|
|
return _weighted_draw_pile_size;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
bool HanabiState<num_suits, num_players, hand_size>::is_relative_state_initialized() const {
|
|
return _relative_representation.initialized;
|
|
}
|
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::update_tablebase(
|
|
unsigned long id,
|
|
Hanabi::probability_t probability) {
|
|
if (_position_tablebase.contains(id)) {
|
|
ASSERT(_position_tablebase[id] == probability);
|
|
}
|
|
_position_tablebase[id] = probability;
|
|
}
|
|
|
|
} // namespace Hanabi
|