Endgame-Analyzer/include/game_state.hpp

998 lines
40 KiB
C++
Raw Normal View History

2023-08-04 16:28:41 +02:00
#include <algorithm>
2023-11-11 11:44:37 +01:00
#include <iostream>
2023-08-12 19:43:22 +02:00
2023-08-06 15:02:50 +02:00
#include "myassert.h"
#include "game_state.h"
2023-08-04 16:28:41 +02:00
2023-08-05 11:55:46 +02:00
namespace Hanabi {
2023-08-04 16:28:41 +02:00
template<typename T>
std::ostream& print_probability(std::ostream& os, const std::optional<T>& prob) {
if (prob.has_value()) {
return print_probability(os, prob.value());
} else {
os << "unknown";
}
return os;
}
std::ostream& print_probability(std::ostream& os, const rational_probability & prob) {
os << prob << " ~ " << std::setprecision(5) << boost::rational_cast<double>(prob) * 100 << "%";
return os;
}
std::ostream& print_probability(std::ostream& os, double prob) {
os << std::setprecision(5) << prob;
return os;
}
std::ostream &operator<<(std::ostream &os, HanabiStateIF const &hanabi_state) {
hanabi_state.print(os);
return os;
}
std::string to_string(const Hanabi::Card &card) {
if (card == Hanabi::Cards::trash) {
return "kt";
} else {
return Hanabi::suit_initials[card.suit] + std::to_string(5 - card.rank);
}
}
2023-08-11 16:54:11 +02:00
std::ostream &operator<<(std::ostream &os, Action const& action) {
switch(action.type) {
case ActionType::play:
os << "play " + to_string(action.card);
2023-08-11 16:54:11 +02:00
break;
case ActionType::discard:
os << "discard";
break;
case ActionType::clue:
os << "clue";
break;
default:
break;
}
return os;
}
bool Card::operator==(const Card &other) const {
return suit == other.suit and rank == other.rank;
}
std::ostream &operator<<(std::ostream &os, const Card &card) {
os << to_string(card);
return os;
}
2023-08-07 10:45:11 +02:00
template<size_t num_suits>
std::ostream &operator<<(std::ostream &os, const Stacks<num_suits> &stacks) {
2023-08-12 11:09:06 +02:00
for (size_t i = 0; i < stacks.size(); i++) {
os << suit_initials[i] << starting_card_rank - stacks[i];
if(i < stacks.size() - 1) {
os << ", ";
}
2023-08-05 11:55:46 +02:00
}
return os;
}
template<suit_t num_suits, typename T>
2023-08-07 01:51:24 +02:00
void CardArray<num_suits, T>::fill(T val) {
for (size_t suit = 0; suit < num_suits; suit++) {
for (rank_t rank = 0; rank < starting_card_rank; rank++) {
2023-08-07 01:51:24 +02:00
_array[suit][rank] = val;
2023-08-05 12:19:34 +02:00
}
}
}
2023-08-07 01:51:24 +02:00
template<suit_t num_suits, typename T>
CardArray<num_suits, T>::CardArray(T default_val) {
fill(default_val);
}
template<suit_t num_suits, typename T>
const T &CardArray<num_suits, T>::operator[](const Card &card) const {
return _array[card.suit][card.rank];
2023-08-05 11:55:46 +02:00
};
template<suit_t num_suits, typename T>
T &CardArray<num_suits, T>::operator[](const Card &card) {
return _array[card.suit][card.rank];
2023-08-05 11:55:46 +02:00
};
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
HanabiState<num_suits, num_players, hand_size>::BacktrackAction::BacktrackAction(
2023-08-10 12:44:09 +02:00
Hanabi::ActionType action_type, Hanabi::Card discarded_or_played, Hanabi::hand_index_t index,
bool was_on_8_clues, std::list<CardMultiplicity> draw_pile
):
action_type(action_type),
discarded(discarded_or_played),
2023-08-10 12:44:09 +02:00
index(index),
was_on_8_clues(was_on_8_clues), draw_pile(std::move(draw_pile)) {
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> &deck, uint8_t score_goal):
_turn(0),
_num_clues(max_num_clues),
_weighted_draw_pile_size(deck.size()),
_stacks(),
_hands(),
_draw_pile(),
_endgame_turns_left(no_endgame),
_pace(deck.size() - score_goal - num_players * (hand_size - 1)),
_score(0),
_score_goal(score_goal),
2023-08-11 13:47:57 +02:00
_actions_log(),
_relative_representation(),
_position_tablebase(),
_enumerated_states(0) {
2023-08-05 12:19:34 +02:00
std::ranges::fill(_stacks, starting_card_rank);
for (const Card &card: deck) {
2023-08-05 12:19:34 +02:00
_draw_pile.push_back({card, 1});
}
for (player_t player = 0; player < num_players; player++) {
for (std::uint8_t index = 0; index < hand_size; index++) {
draw(index);
2023-08-05 12:19:34 +02:00
}
incr_turn();
}
2023-08-06 15:02:50 +02:00
ASSERT(_turn == 0);
2023-08-05 12:19:34 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::give_clue() {
2023-08-06 15:02:50 +02:00
ASSERT(_num_clues > 0);
2023-08-05 11:55:46 +02:00
--_num_clues;
_actions_log.emplace(ActionType::clue, Cards::unknown, 0);
2023-08-05 11:55:46 +02:00
incr_turn();
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::incr_turn() {
2023-08-05 11:55:46 +02:00
_turn = (_turn + 1) % num_players;
if (_endgame_turns_left != no_endgame) {
2023-08-06 15:02:50 +02:00
_endgame_turns_left--;
2023-08-06 11:54:57 +02:00
}
2023-08-05 11:55:46 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::decr_turn() {
2023-08-05 11:55:46 +02:00
_turn = (_turn + num_players - 1) % num_players;
2023-08-06 15:02:50 +02:00
if (_endgame_turns_left != no_endgame) {
_endgame_turns_left++;
2023-08-06 11:54:57 +02:00
}
}
2023-11-11 11:44:37 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::check_draw_pile_integrity() const
{
if (not _relative_representation.initialized)
{
return;
}
if (_draw_pile.size() >= 2) {
auto copy = _draw_pile;
copy.sort([](CardMultiplicity const & card1, CardMultiplicity const & card2){
return card1.card.rank < card2.card.rank or (card1.card.rank == card2.card.rank and card1.card.suit < card2.card.suit);
});
auto before = copy.begin();
for(auto it = std::next(copy.begin()); it != copy.end(); ++it) {
ASSERT(before->card != it->card);
++before;
}
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
2023-08-06 11:54:57 +02:00
bool HanabiState<num_suits, num_players, hand_size>::is_playable(const Hanabi::Card &card) const {
return card.rank == _stacks[card.suit] - 1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::enumerated_states() const {
return _enumerated_states;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
2023-08-06 11:54:57 +02:00
bool HanabiState<num_suits, num_players, hand_size>::is_trash(const Hanabi::Card &card) const {
return card.rank >= _stacks[card.suit];
2023-08-05 11:55:46 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::play(Hanabi::hand_index_t index) {
play_and_potentially_update(index);
2023-08-07 00:06:50 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned long HanabiState<num_suits, num_players, hand_size>::play_and_potentially_update(hand_index_t index, bool cycle) {
2023-11-11 11:44:37 +01:00
check_draw_pile_integrity();
auto copy = _draw_pile;
2023-08-06 15:02:50 +02:00
ASSERT(index < _hands[_turn].size());
const Card played_card = _hands[_turn][index];
if (!is_playable(played_card)) {
const unsigned long multiplicity = draw(index, cycle);
incr_turn();
return multiplicity;
}
ASSERT(is_playable(played_card));
2023-08-05 11:55:46 +02:00
_actions_log.emplace(ActionType::play, played_card, index, _num_clues == 8, copy);
2023-08-10 12:44:09 +02:00
--_stacks[played_card.suit];
2023-08-06 11:54:57 +02:00
_score++;
2023-08-05 11:55:46 +02:00
if (played_card.rank == 0 and _num_clues < max_num_clues) {
// update clues if we played the last played_card of a stack
2023-08-05 11:55:46 +02:00
_num_clues++;
}
const unsigned long multiplicity = draw(index, cycle);
2023-08-05 11:55:46 +02:00
2023-08-06 22:15:09 +02:00
incr_turn();
2023-11-11 11:44:37 +01:00
check_draw_pile_integrity();
return multiplicity;
2023-08-05 11:55:46 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::discard(hand_index_t index) {
discard_and_potentially_update(index);
2023-08-07 10:36:03 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned long HanabiState<num_suits, num_players, hand_size>::discard_and_potentially_update(hand_index_t index, bool cycle) {
2023-11-11 11:44:37 +01:00
check_draw_pile_integrity();
auto copy = _draw_pile;
2023-08-06 15:02:50 +02:00
ASSERT(index < _hands[_turn].size());
ASSERT(_num_clues != max_num_clues);
2023-08-05 11:55:46 +02:00
const Card discarded_card = _hands[_turn][index];
2023-08-05 11:55:46 +02:00
_num_clues++;
2023-08-06 11:54:57 +02:00
_pace--;
2023-08-05 11:55:46 +02:00
unsigned long multiplicity = draw(index, cycle);
_actions_log.emplace(ActionType::discard, discarded_card, index, false, copy);
2023-08-05 11:55:46 +02:00
incr_turn();
2023-11-11 11:44:37 +01:00
check_draw_pile_integrity();
return multiplicity;
2023-08-05 11:55:46 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint8_t HanabiState<num_suits, num_players, hand_size>::find_card_in_hand(
const Hanabi::Card &card) const {
for (std::uint8_t i = 0; i < hand_size; i++) {
if (_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) {
return i;
}
}
return -1;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::print(std::ostream &os) const {
2023-08-07 10:45:11 +02:00
os << "Stacks: " << _stacks << " (score " << +_score << ")";
2023-08-12 11:09:06 +02:00
os << ", clues: " << +_num_clues << ", turn: " << +_turn;
if (_endgame_turns_left != no_endgame) {
os << ", " << +_endgame_turns_left << " turns left";
}
os << std::endl;
2023-08-05 11:55:46 +02:00
os << "Draw pile: ";
2023-08-12 11:09:06 +02:00
unsigned num_trash = 0;
for (const auto &[card, mul]: _draw_pile) {
2023-08-12 11:09:06 +02:00
if (is_trash(card)) {
num_trash += mul;
continue;
}
2023-08-05 11:55:46 +02:00
os << card;
if (mul > 1) {
os << " (" << +mul << ")";
}
os << ", ";
2023-08-05 11:55:46 +02:00
}
2023-08-12 11:09:06 +02:00
if (num_trash > 0) {
os << Cards::trash << " (" << num_trash << ") ";
}
os << "[size " << +_weighted_draw_pile_size << "]" << std::endl;
2023-08-05 11:55:46 +02:00
os << "Hands: ";
for (const auto &hand: _hands) {
2023-08-12 11:09:06 +02:00
os << "[";
for(hand_index_t index = 0; index < hand.size(); index++) {
os << hand[index];
if (index < hand.size() - 1) {
os << " ";
}
2023-08-05 11:55:46 +02:00
}
2023-08-12 11:09:06 +02:00
os << "] ";
2023-08-05 11:55:46 +02:00
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
unsigned HanabiState<num_suits, num_players, hand_size>::draw(uint8_t index, bool cycle) {
2023-08-06 15:02:50 +02:00
ASSERT(index < _hands[_turn].size());
2023-08-05 11:55:46 +02:00
2023-08-07 11:04:53 +02:00
// update card position of the card we are about to discard
if (_relative_representation.initialized) {
2023-08-07 11:04:53 +02:00
const Card discarded = _hands[_turn][index];
if (!discarded.initial_trash) {
if (discarded.in_starting_hand) {
2023-08-11 13:47:57 +02:00
ASSERT(_relative_representation.card_positions_hands[discarded.local_index] == true);
_relative_representation.card_positions_hands[discarded.local_index] = false;
2023-08-07 11:04:53 +02:00
} else {
2023-08-11 13:47:57 +02:00
auto replaced_card_it = std::ranges::find(_relative_representation.card_positions_draw[discarded.local_index], _turn);
ASSERT(replaced_card_it != _relative_representation.card_positions_draw[discarded.local_index].end());
2023-08-07 11:04:53 +02:00
*replaced_card_it = trash_or_play_stack;
std::ranges::sort(_relative_representation.card_positions_draw[discarded.local_index]);
2023-08-07 11:04:53 +02:00
}
}
}
2023-08-05 11:55:46 +02:00
// draw a new card if the draw pile is not empty
if (!_draw_pile.empty()) {
2023-08-06 11:54:57 +02:00
--_weighted_draw_pile_size;
const CardMultiplicity draw = _draw_pile.front();
2023-08-05 11:55:46 +02:00
_draw_pile.pop_front();
2023-08-06 15:02:50 +02:00
ASSERT(draw.multiplicity > 0);
2023-08-06 11:54:57 +02:00
2023-08-05 11:55:46 +02:00
if (draw.multiplicity > 1) {
if (cycle) {
2023-08-05 11:55:46 +02:00
_draw_pile.push_back(draw);
2023-08-06 11:54:57 +02:00
_draw_pile.back().multiplicity--;
} else {
_draw_pile.push_front(draw);
_draw_pile.front().multiplicity--;
}
2023-08-05 11:55:46 +02:00
}
2023-08-06 11:54:57 +02:00
if (_relative_representation.initialized) {
2023-08-07 11:04:53 +02:00
// update card position of the drawn card
2023-08-07 10:36:03 +02:00
if (!draw.card.initial_trash) {
ASSERT(draw.card.in_starting_hand == false);
2023-08-11 13:47:57 +02:00
auto new_card_it = std::ranges::find(_relative_representation.card_positions_draw[draw.card.local_index], draw_pile);
ASSERT(new_card_it != _relative_representation.card_positions_draw[draw.card.local_index].end());
2023-08-07 10:36:03 +02:00
*new_card_it = _turn;
std::ranges::sort(_relative_representation.card_positions_draw[draw.card.local_index]);
2023-08-07 10:36:03 +02:00
}
}
_hands[_turn][index] = draw.card;
2023-08-06 11:54:57 +02:00
if (_draw_pile.empty()) {
2023-08-06 15:02:50 +02:00
// Note the +1, since we will immediately decrement this when moving to the next player
_endgame_turns_left = num_players + 1;
}
2023-08-06 11:54:57 +02:00
return draw.multiplicity;
2023-08-05 11:55:46 +02:00
}
return 1;
2023-08-05 11:55:46 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_draw(std::uint8_t index, Card discarded_card, bool cycle) {
2023-08-06 15:02:50 +02:00
if (_endgame_turns_left == num_players + 1 || _endgame_turns_left == no_endgame) {
2023-08-06 13:53:18 +02:00
// Put the card that is currently in hand back into the draw pile
2023-08-06 15:02:50 +02:00
ASSERT(index < _hands[_turn].size());
2023-08-06 13:53:18 +02:00
const Card &drawn = _hands[_turn][index];
2023-08-05 11:55:46 +02:00
if (cycle)
{
// put discarded_card back into draw pile (at the back)
if (!_draw_pile.empty() and _draw_pile.back().card.suit == drawn.suit and
_draw_pile.back().card.rank == drawn.rank) {
_draw_pile.back().multiplicity++;
} else {
_draw_pile.push_back({drawn, 1});
}
} else
{
// We don't know where the card came from (between the card having been removed from the draw pile
// and re-adding it now, the user may have arbitrarily permuted the draw pile implicitly)
// so we have to check if it is already contained in the draw pile somewhere
auto it = std::find_if(_draw_pile.begin(), _draw_pile.end(), [&drawn](CardMultiplicity const & mult){
return mult.card == drawn;
});
if (it != _draw_pile.end())
{
it->multiplicity++;
}
else
{
_draw_pile.push_front({drawn, 1});
}
2023-08-06 13:53:18 +02:00
}
2023-08-07 10:36:03 +02:00
if (_relative_representation.initialized && !drawn.initial_trash) {
ASSERT(drawn.in_starting_hand == false);
2023-08-11 13:47:57 +02:00
auto drawn_card_it = std::ranges::find(_relative_representation.card_positions_draw[drawn.local_index], _turn);
ASSERT(drawn_card_it != _relative_representation.card_positions_draw[drawn.local_index].end());
2023-08-07 10:36:03 +02:00
*drawn_card_it = draw_pile;
std::ranges::sort(_relative_representation.card_positions_draw[drawn.local_index]);
2023-08-07 10:36:03 +02:00
}
2023-08-06 13:53:18 +02:00
_weighted_draw_pile_size++;
2023-08-06 15:02:50 +02:00
_endgame_turns_left = no_endgame;
2023-08-07 11:04:53 +02:00
} else {
ASSERT(_hands[_turn][index] == discarded_card);
2023-08-05 11:55:46 +02:00
}
2023-08-07 10:36:03 +02:00
if (_relative_representation.initialized && !discarded_card.initial_trash) {
if (discarded_card.in_starting_hand) {
2023-08-11 13:47:57 +02:00
ASSERT(_relative_representation.card_positions_hands[discarded_card.local_index] == false);
_relative_representation.card_positions_hands[discarded_card.local_index] = true;
2023-08-07 10:36:03 +02:00
} else {
2023-08-11 13:47:57 +02:00
auto hand_card_it = std::ranges::find(_relative_representation.card_positions_draw[discarded_card.local_index],
trash_or_play_stack);
2023-08-11 13:47:57 +02:00
ASSERT(hand_card_it != _relative_representation.card_positions_draw[discarded_card.local_index].end());
2023-08-07 11:04:53 +02:00
*hand_card_it = _turn;
std::ranges::sort(_relative_representation.card_positions_draw[discarded_card.local_index]);
2023-08-07 10:36:03 +02:00
}
}
2023-08-06 13:53:18 +02:00
_hands[_turn][index] = discarded_card;
2023-08-05 11:55:46 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::init_backtracking_information() {
2023-08-07 01:51:24 +02:00
// Note that this function does not have to be particularly performant, we only call it once to initialize.
const Card trash = [this]() -> Card {
for (suit_t suit = 0; suit < num_suits; suit++) {
if (_stacks[suit] < starting_card_rank) {
return {suit, starting_card_rank - 1, 0, false, true};
}
}
return {0, 0};
}();
CardArray<num_suits, std::uint8_t> nums_in_draw_pile;
for (const auto [card, multiplicity]: _draw_pile) {
if (_stacks[card.suit] > card.rank) {
nums_in_draw_pile[card] += multiplicity;
} else {
2023-08-07 01:51:24 +02:00
nums_in_draw_pile[trash] += multiplicity;
}
}
2023-08-07 01:51:24 +02:00
// Prepare draw pile
_draw_pile.clear();
for (suit_t suit = 0; suit < num_suits; suit++) {
for (rank_t rank = 0; rank < starting_card_rank; rank++) {
2023-08-11 13:47:57 +02:00
Card card{suit, rank, static_cast<uint8_t>(_relative_representation.card_positions_draw.size()), false, is_trash(card)};
if (nums_in_draw_pile[card] > 0) {
_draw_pile.push_back({card, nums_in_draw_pile[card]});
if (!is_trash(card)) {
2023-08-11 16:54:11 +02:00
_relative_representation.card_positions_draw.push_back({});
_relative_representation.card_positions_draw.back().resize(nums_in_draw_pile[card], draw_pile);
2023-08-11 13:47:57 +02:00
_relative_representation.good_cards_draw.push_back(card);
2023-08-07 01:51:24 +02:00
}
}
}
}
2023-08-11 13:47:57 +02:00
_relative_representation.initial_draw_pile_size = _weighted_draw_pile_size;
2023-08-07 01:51:24 +02:00
// Prepare cards in hands
for (player_t player = 0; player < num_players; player++) {
for (Card &card: _hands[player]) {
2023-08-07 10:36:03 +02:00
card.initial_trash = is_trash(card);
card.in_starting_hand = true;
// Needed to check for dupes in same hand
boost::container::static_vector<Card, hand_size> good_cards_in_hand;
if (!is_trash(card)) {
if (std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0) {
2023-08-07 01:51:24 +02:00
// This card is already in hand, so just replace the second copy by some trash
card = trash;
} else {
2023-08-11 13:47:57 +02:00
card.local_index = _relative_representation.num_useful_cards_in_starting_hands;
_relative_representation.num_useful_cards_in_starting_hands++;
good_cards_in_hand.push_back(card);
2023-08-07 01:51:24 +02:00
}
}
}
}
2023-08-11 13:47:57 +02:00
_relative_representation.card_positions_hands.reset();
for (size_t i = 0; i < _relative_representation.num_useful_cards_in_starting_hands; i++) {
_relative_representation.card_positions_hands[i] = true;
}
_relative_representation.initialized = true;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void
HanabiState<num_suits, num_players, hand_size>::revert_play(bool cycle) {
2023-11-11 11:44:37 +01:00
check_draw_pile_integrity();
const BacktrackAction last_action = _actions_log.top();
_actions_log.pop();
ASSERT(last_action.action_type == ActionType::play);
2023-08-10 12:44:09 +02:00
ASSERT(!last_action.was_on_8_clues or _num_clues == 8);
2023-08-05 11:55:46 +02:00
decr_turn();
2023-08-10 12:44:09 +02:00
if (last_action.discarded.rank == 0 and not last_action.was_on_8_clues) {
_num_clues--;
2023-08-05 11:55:46 +02:00
}
revert_draw(last_action.index, last_action.discarded, cycle);
if(_stacks[last_action.discarded.suit] == last_action.discarded.rank) {
_stacks[last_action.discarded.suit]++;
}
_score--;
if (not cycle)
{
ASSERT(last_action.draw_pile == _draw_pile);
}
2023-11-11 11:44:37 +01:00
check_draw_pile_integrity();
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_discard(bool cycle) {
2023-11-11 11:44:37 +01:00
check_draw_pile_integrity();
const BacktrackAction last_action = _actions_log.top();
_actions_log.pop();
ASSERT(last_action.action_type == ActionType::discard);
decr_turn();
ASSERT(_num_clues > 0);
_num_clues--;
_pace++;
revert_draw(last_action.index, last_action.discarded, cycle);
if (not cycle)
{
ASSERT(last_action.draw_pile == _draw_pile);
}
2023-11-11 11:44:37 +01:00
check_draw_pile_integrity();
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_clue() {
const BacktrackAction last_action = _actions_log.top();
_actions_log.pop();
ASSERT(last_action.action_type == ActionType::clue);
decr_turn();
ASSERT(_num_clues < max_num_clues);
_num_clues++;
2023-08-05 11:55:46 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert() {
switch(_actions_log.top().action_type) {
case ActionType::clue:
revert_clue();
break;
case ActionType::discard:
revert_discard();
break;
case ActionType::play:
revert_play();
break;
default:
return;
}
}
2023-11-10 23:53:28 +01:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::modify_clues(Hanabi::clue_t change)
{
_num_clues += change;
if (_num_clues > 8) {
_num_clues = 8;
}
if (_num_clues < 0) {
_num_clues = 0;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::set_clues(Hanabi::clue_t clues) {
assert(0 <= clues);
assert(clues <= 8);
_num_clues = clues;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
player_t HanabiState<num_suits, num_players, hand_size>::turn() const {
return _turn;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
clue_t HanabiState<num_suits, num_players, hand_size>::num_clues() const {
return _num_clues;
};
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<std::vector<Card>> HanabiState<num_suits, num_players, hand_size>::hands() const {
std::vector<std::vector<Card>> hands;
for(player_t player = 0; player < num_players; player++) {
hands.push_back({});
for(const Card& card: _hands[player]) {
hands.back().push_back(card);
}
}
return hands;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<Card> HanabiState<num_suits, num_players, hand_size>::cur_hand() const {
std::vector<Card> hand;
for(const Card& card: _hands[_turn]) {
hand.push_back(card);
}
return hand;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
2023-08-12 00:04:02 +02:00
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> HanabiState<num_suits, num_players, hand_size>::possible_next_states(hand_index_t index, bool play) {
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> next_states;
do_for_each_potential_draw(index, play, [this, &next_states, &index](unsigned multiplicity){
auto prob = lookup();
// bit hacky to get drawn card here
decr_turn();
2023-08-12 00:04:02 +02:00
const CardMultiplicity drawn_card = {_hands[_turn][index], multiplicity};
incr_turn();
2023-08-12 00:04:02 +02:00
next_states.emplace_back(drawn_card, prob);
});
return next_states;
}
2023-08-11 15:41:03 +02:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::vector<std::pair<Action, std::optional<probability_t>>> HanabiState<num_suits, num_players, hand_size>::get_reasonable_actions() {
std::vector<std::pair<Action, std::optional<probability_t>>> reasonable_actions {};
if(_score == _score_goal or _pace < 0 or _endgame_turns_left == 0) {
2023-08-11 15:41:03 +02:00
return reasonable_actions;
}
const std::array<Card, hand_size>& hand = _hands[_turn];
// First, check for playable cards
for(std::uint8_t index = 0; index < hand_size; index++) {
if(is_playable(hand[index])) {
const Action action = {ActionType::play, hand[index]};
bool known = true;
probability_t sum_of_probabilities = 0;
do_for_each_potential_draw(index, true, [this, &sum_of_probabilities, &known](const unsigned long multiplicity){
2023-08-11 15:41:03 +02:00
const std::optional<probability_t> prob = lookup();
if (prob.has_value()) {
sum_of_probabilities += prob.value() * multiplicity;
} else {
known = false;
}
});
if (known) {
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
const probability_t probability_play = sum_of_probabilities / total_weight;
reasonable_actions.emplace_back(action, probability_play);
} else {
reasonable_actions.emplace_back(action, std::nullopt);
}
}
}
if(_pace > 0 and _num_clues < max_num_clues) {
for(std::uint8_t index = 0; index < hand_size; index++) {
if (is_trash(hand[index])) {
const Action action = {ActionType::discard, hand[index]};
bool known = true;
probability_t sum_of_probabilities = 0;
do_for_each_potential_draw(index, false, [this, &sum_of_probabilities, &known](const unsigned long multiplicity){
2023-08-11 15:41:03 +02:00
const std::optional<probability_t> prob = lookup();
if (prob.has_value()) {
sum_of_probabilities += prob.value() * multiplicity;
} else {
known = false;
}
});
if (known) {
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
const probability_t probability_discard = sum_of_probabilities / total_weight;
reasonable_actions.emplace_back(action, probability_discard);
} else {
reasonable_actions.emplace_back(action, std::nullopt);
}
// All discards are equivalent, do not continue searching for different trash
break;
}
}
}
if(_num_clues > 0) {
give_clue();
2023-08-11 15:41:03 +02:00
const std::optional<probability_t> prob = lookup();
const Action action = {ActionType::clue, Cards::unknown};
2023-08-11 15:41:03 +02:00
reasonable_actions.emplace_back(action, prob);
revert_clue();
}
2023-08-11 16:54:11 +02:00
return reasonable_actions;
2023-08-11 15:41:03 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::optional<probability_t> HanabiState<num_suits, num_players, hand_size>::lookup() const {
if (_score == 5 * num_suits) {
return 1;
}
if (_pace < 0 or _endgame_turns_left == 0) {
return 0;
}
2023-08-11 15:41:03 +02:00
const auto id = unique_id();
if(_position_tablebase.contains(id)) {
2023-08-11 16:54:11 +02:00
return _position_tablebase.at(id);
2023-08-11 15:41:03 +02:00
} else {
return std::nullopt;
}
}
2023-08-06 11:54:57 +02:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::rotate_next_draw(const Card& card) {
auto card_it = std::find_if(_draw_pile.begin(), _draw_pile.end(), [&card, this](const CardMultiplicity& card_multiplicity){
return (is_trash(card) and is_trash(card_multiplicity.card)) or (card_multiplicity.card.rank == card.rank and card_multiplicity.card.suit == card.suit);
});
ASSERT(card_it != _draw_pile.end());
std::swap(*card_it, _draw_pile.front());
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
ActionType HanabiState<num_suits, num_players, hand_size>::last_action_type() const
{
assert(not _actions_log.empty());
return _actions_log.top().action_type;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
2023-08-10 18:27:25 +02:00
probability_t HanabiState<num_suits, num_players, hand_size>::evaluate_state() {
ASSERT(_relative_representation.initialized);
2023-08-06 15:05:34 +02:00
_enumerated_states++;
const unsigned long id_of_state = unique_id();
if (_score == _score_goal) {
2023-08-06 11:54:57 +02:00
return 1;
}
2023-08-06 15:02:50 +02:00
if(_pace < 0 || _endgame_turns_left == 0) {
2023-08-06 11:54:57 +02:00
return 0;
}
if (_position_tablebase.contains(id_of_state)) {
return _position_tablebase[id_of_state];
}
2023-08-06 11:54:57 +02:00
// TODO: Have some endgame analysis here?
2023-08-10 11:14:15 +02:00
probability_t best_probability = 0;
const std::array<Card, hand_size>& hand = _hands[_turn];
2023-08-06 11:54:57 +02:00
// First, check for playables
for(std::uint8_t index = 0; index < hand_size; index++) {
if(is_playable(hand[index])) {
probability_t sum_of_probabilities = 0;
do_for_each_potential_draw(index, true, [this, &sum_of_probabilities](const unsigned long multiplicity){
2023-08-10 18:27:25 +02:00
sum_of_probabilities += evaluate_state() * multiplicity;
});
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
const probability_t probability_play = sum_of_probabilities / total_weight;
best_probability = std::max(best_probability, probability_play);
if (best_probability == 1) {
update_tablebase(id_of_state, best_probability);
return best_probability;
};
2023-08-06 11:54:57 +02:00
}
}
// Check for discards now
if(_pace > 0 and _num_clues < max_num_clues) {
2023-08-06 11:54:57 +02:00
for(std::uint8_t index = 0; index < hand_size; index++) {
if (is_trash(hand[index])) {
2023-08-10 11:14:15 +02:00
probability_t sum_of_probabilities = 0;
do_for_each_potential_draw(index, false, [this, &sum_of_probabilities](const unsigned long multiplicity){
2023-08-10 18:27:25 +02:00
sum_of_probabilities += evaluate_state() * multiplicity;
});
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
const probability_t probability_discard = sum_of_probabilities / total_weight;
best_probability = std::max(best_probability, probability_discard);
best_probability = std::max(best_probability, probability_discard);
if (best_probability == 1) {
update_tablebase(id_of_state, best_probability);
return best_probability;
};
2023-08-06 11:54:57 +02:00
// All discards are equivalent, do not continue searching for different trash
break;
}
}
}
// Last option is to stall
if(_num_clues > 0) {
give_clue();
2023-08-10 18:27:25 +02:00
const probability_t probability_stall = evaluate_state();
revert_clue();
best_probability = std::max(best_probability, probability_stall);
if (best_probability == 1) {
update_tablebase(id_of_state, best_probability);
return best_probability;
};
2023-08-06 11:54:57 +02:00
}
update_tablebase(id_of_state, best_probability);
return best_probability;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
template<class Function>
void HanabiState<num_suits, num_players, hand_size>::do_for_each_potential_draw(hand_index_t index, bool play, Function f) {
auto copy = _draw_pile;
auto do_action = [this, index, play](){
if (play) {
return play_and_potentially_update(index, true);
} else {
return discard_and_potentially_update(index, true);
}
};
auto revert_action = [this, play](){
if (play) {
revert_play(true);
} else {
revert_discard(true);
}
};
if(_draw_pile.empty()) {
do_action();
f(1);
revert_action();
} else {
2023-08-11 16:54:11 +02:00
unsigned sum_of_multiplicities = 0;
2023-11-11 11:44:37 +01:00
[[maybe_unused]] const auto pile = _draw_pile;
for(size_t i = 0; i < _draw_pile.size(); i++) {
const unsigned long multiplicity = do_action();
sum_of_multiplicities += multiplicity;
f(multiplicity);
revert_action();
}
ASSERT(sum_of_multiplicities == _weighted_draw_pile_size);
}
ASSERT(_draw_pile == copy);
2023-08-06 11:54:57 +02:00
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::unique_id() const {
unsigned long id = 0;
// encode all positions of cards that started in draw pile
2023-08-11 13:47:57 +02:00
ASSERT(_relative_representation.card_positions_draw.size() == _relative_representation.good_cards_draw.size());
for(size_t i = 0; i < _relative_representation.card_positions_draw.size(); i++) {
for(player_t player : _relative_representation.card_positions_draw[i]) {
id *= num_players + 2;
// We normalize here: If a card is already played, then the positions of its other copies
// do not matter, so we can just pretend that they are all in the trash already.
// The resulting states will be equivalent.
2023-08-11 13:47:57 +02:00
if (!is_trash(_relative_representation.good_cards_draw[i])) {
id += player;
} else {
id += trash_or_play_stack;
}
}
}
// encode number of clues
id *= max_num_clues + 1;
id += _num_clues;
// we can encode draw pile size and extra turn in one metric, since we only have extra turns if draw pile is empty
const std::uint8_t draw_pile_size_and_extra_turns = [this]() -> uint8_t {
if(_endgame_turns_left == no_endgame) {
return _weighted_draw_pile_size + num_players;
}
else {
return _endgame_turns_left;
}
}();
2023-08-11 13:47:57 +02:00
id *= _relative_representation.initial_draw_pile_size + num_players;
id += draw_pile_size_and_extra_turns;
// encode positions of cards that started in hands
2023-08-11 13:47:57 +02:00
id = id << _relative_representation.num_useful_cards_in_starting_hands;
id += _relative_representation.card_positions_hands.to_ulong();
id *= num_players;
id += _turn;
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
// the number of clues, the draw pile size and whose turn it is.
// This already uniquely determines the current players position, assuming that we never discard good cards
// (and only play them)
return id;
}
2023-08-12 18:48:01 +02:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::pair<std::vector<std::uint64_t>, std::vector<Card>> HanabiState<num_suits, num_players, hand_size>::dump_unique_id_parts() const {
std::vector<std::uint64_t> ret;
std::vector<Card> cards;
// encode all positions of cards that started in draw pile
ASSERT(_relative_representation.card_positions_draw.size() == _relative_representation.good_cards_draw.size());
for(size_t i = 0; i < _relative_representation.card_positions_draw.size(); i++) {
for(player_t player : _relative_representation.card_positions_draw[i]) {
// We normalize here: If a card is already played, then the positions of its other copies
// do not matter, so we can just pretend that they are all in the trash already.
// The resulting states will be equivalent.
if (!is_trash(_relative_representation.good_cards_draw[i])) {
ret.push_back(player);
} else {
ret.push_back(trash_or_play_stack);
}
cards.push_back(_relative_representation.good_cards_draw[i]);
}
}
// encode number of clues
ret.push_back(_num_clues);
// we can encode draw pile size and extra turn in one metric, since we only have extra turns if draw pile is empty
const std::uint8_t draw_pile_size_and_extra_turns = [this]() -> uint8_t {
if(_endgame_turns_left == no_endgame) {
return _weighted_draw_pile_size + num_players;
}
else {
return _endgame_turns_left;
}
}();
ret.push_back(draw_pile_size_and_extra_turns);
// encode positions of cards that started in hands
ret.push_back(_relative_representation.card_positions_hands.to_ulong());
ret.push_back(_turn);
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
// the number of clues, the draw pile size and whose turn it is.
// This already uniquely determines the current players position, assuming that we never discard good cards
// (and only play them)
return {ret, cards};
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
const std::unordered_map<unsigned long, probability_t>& HanabiState<num_suits, num_players, hand_size>::position_tablebase() const {
return _position_tablebase;
}
2023-08-08 00:31:16 +02:00
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
size_t HanabiState<num_suits, num_players, hand_size>::draw_pile_size() const {
return _weighted_draw_pile_size;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_relative_state_initialized() const {
return _relative_representation.initialized;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::update_tablebase(
unsigned long id,
Hanabi::probability_t probability) {
if (_position_tablebase.contains(id)) {
ASSERT(_position_tablebase[id] == probability);
}
_position_tablebase[id] = probability;
}
2023-08-05 11:55:46 +02:00
} // namespace Hanabi