431 lines
No EOL
16 KiB
C++
431 lines
No EOL
16 KiB
C++
#include <algorithm>
|
|
#include <iterator>
|
|
#include "myassert.h"
|
|
#include "game_state.h"
|
|
#include <vector>
|
|
|
|
namespace Hanabi {
|
|
|
|
std::ostream& operator<<(std::ostream& os, HanabiStateIF const& hanabi_state) {
|
|
hanabi_state.print(os);
|
|
return os;
|
|
}
|
|
|
|
Card &Card::operator++() {
|
|
rank++;
|
|
return *this;
|
|
}
|
|
|
|
Card Card::successor() const { return {suit, static_cast<rank_t>(rank + 1)}; }
|
|
|
|
const Card Card::operator++(int) {
|
|
Card ret = *this;
|
|
rank++;
|
|
return ret;
|
|
}
|
|
|
|
template<std::size_t num_suits>
|
|
std::ostream &operator<<(std::ostream &os, const Stacks<num_suits> &stacks) {
|
|
for (size_t i = 0; i < stacks.size() - 1; i++) {
|
|
os << starting_card_rank - stacks[i] << ", ";
|
|
}
|
|
os << starting_card_rank - stacks.back();
|
|
return os;
|
|
}
|
|
|
|
template<std::size_t num_suits, typename T, bool respect_card_duplicity>
|
|
CardArray<num_suits, T, respect_card_duplicity>::CardArray(T default_val) {
|
|
for(size_t suit = 0; suit < num_suits; suit++) {
|
|
for (rank_t rank = 0; rank < starting_card_rank; rank++) {
|
|
if constexpr (respect_card_duplicity) {
|
|
std::ranges::fill(_vals.array[suit][rank], default_val);
|
|
} else {
|
|
_vals.array[suit][rank] = default_val;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template<std::size_t num_suits, typename T, bool respect_card_duplicity>
|
|
const T& CardArray<num_suits, T, respect_card_duplicity>::operator[](const Card &card) const {
|
|
if constexpr (respect_card_duplicity) {
|
|
return _vals.array[card.suit][card.rank][card.copy];
|
|
} else {
|
|
return _vals.array[card.suit][card.rank];
|
|
}
|
|
};
|
|
|
|
template<std::size_t num_suits, typename T, bool respect_card_duplicity>
|
|
T& CardArray<num_suits, T, respect_card_duplicity>::operator[](const Card &card) {
|
|
if constexpr (respect_card_duplicity) {
|
|
return _vals.array[card.suit][card.rank][card.copy];
|
|
} else {
|
|
return _vals.array[card.suit][card.rank];
|
|
}
|
|
};
|
|
|
|
template<size_t num_suits, player_t num_players, size_t hand_size>
|
|
HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> &deck):
|
|
_turn(0),
|
|
_num_clues(max_num_clues),
|
|
_weighted_draw_pile_size(deck.size()),
|
|
_stacks(),
|
|
_hands(),
|
|
// _card_positions(draw_pile),
|
|
_draw_pile(),
|
|
_endgame_turns_left(no_endgame),
|
|
_pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)),
|
|
_score(0) {
|
|
std::ranges::fill(_stacks, starting_card_rank);
|
|
for(const Card& card: deck) {
|
|
_draw_pile.push_back({card, 1});
|
|
}
|
|
for(player_t player = 0; player < num_players; player++) {
|
|
for(std::uint8_t index = 0; index < hand_size; index++) {
|
|
draw(index);
|
|
}
|
|
incr_turn();
|
|
}
|
|
ASSERT(_turn == 0);
|
|
}
|
|
|
|
template<size_t num_suits, player_t num_players, size_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::clue() {
|
|
ASSERT(_num_clues > 0);
|
|
--_num_clues;
|
|
|
|
incr_turn();
|
|
}
|
|
|
|
template<size_t num_suits, player_t num_players, size_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::incr_turn() {
|
|
_turn = (_turn + 1) % num_players;
|
|
if(_endgame_turns_left != no_endgame) {
|
|
_endgame_turns_left--;
|
|
}
|
|
}
|
|
|
|
template<size_t num_suits, player_t num_players, size_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::decr_turn() {
|
|
_turn = (_turn + num_players - 1) % num_players;
|
|
if (_endgame_turns_left != no_endgame) {
|
|
_endgame_turns_left++;
|
|
}
|
|
}
|
|
|
|
template<size_t num_suits, player_t num_players, size_t hand_size>
|
|
bool HanabiState<num_suits, num_players, hand_size>::is_playable(const Hanabi::Card &card) const {
|
|
return card.rank == _stacks[card.suit] - 1;
|
|
}
|
|
|
|
template<size_t num_suits, player_t num_players, size_t hand_size>
|
|
std::uint64_t HanabiState<num_suits, num_players, hand_size>::enumerated_states() const {
|
|
return _enumerated_states;
|
|
}
|
|
|
|
template<size_t num_suits, player_t num_players, size_t hand_size>
|
|
bool HanabiState<num_suits, num_players, hand_size>::is_trash(const Hanabi::Card &card) const {
|
|
return card.rank >= _stacks[card.suit];
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
BacktrackAction HanabiState<num_suits, num_players, hand_size>::play(
|
|
std::uint8_t index) {
|
|
ASSERT(index < _hands[_turn].size());
|
|
const Card card = _hands[_turn][index];
|
|
ASSERT(card.rank == _stacks[card.suit] - 1);
|
|
|
|
--_stacks[card.suit];
|
|
_score++;
|
|
|
|
BacktrackAction ret{_hands[_turn][index], index, 0};
|
|
|
|
if (card.rank == 0 and _num_clues < max_num_clues) {
|
|
// update clues if we played the last card of a stack
|
|
_num_clues++;
|
|
}
|
|
|
|
ret.multiplicity = draw(index);
|
|
incr_turn();
|
|
|
|
return ret;
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
BacktrackAction HanabiState<num_suits, num_players, hand_size>::discard(std::uint8_t index) {
|
|
ASSERT(index < _hands[_turn].size());
|
|
ASSERT(_num_clues != max_num_clues);
|
|
|
|
_num_clues++;
|
|
_pace--;
|
|
|
|
BacktrackAction ret{_hands[_turn][index], index};
|
|
|
|
ret.multiplicity = draw(index);
|
|
incr_turn();
|
|
|
|
return ret;
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
std::uint8_t HanabiState<num_suits, num_players, hand_size>::find_card_in_hand(
|
|
const Hanabi::Card &card) const {
|
|
for(std::uint8_t i = 0; i < hand_size; i++) {
|
|
if(_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) {
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::print(std::ostream &os) const {
|
|
os << "Stacks: " << _stacks << " (score " << +_score << ")";
|
|
os << ", clues: " << +_num_clues << ", turn: " << +_turn << std::endl;
|
|
os << "Draw pile: ";
|
|
for (const auto &[card, mul]: _draw_pile) {
|
|
os << card;
|
|
if (mul > 1) {
|
|
os << " (" << +mul << ")";
|
|
}
|
|
os << ", ";
|
|
}
|
|
os << "(size " << +_weighted_draw_pile_size << ")" << std::endl;
|
|
os << "Hands: ";
|
|
for (const auto &hand: _hands) {
|
|
for (const auto &card: hand) {
|
|
os << card << ", ";
|
|
}
|
|
os << " | ";
|
|
}
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
std::uint8_t HanabiState<num_suits, num_players, hand_size>::draw(uint8_t index) {
|
|
ASSERT(index < _hands[_turn].size());
|
|
|
|
const Card& discarded = _hands[_turn][index];
|
|
if (_stacks[discarded.suit] > discarded.rank) {
|
|
// _card_positions[_hands[_turn][index]] = trash_or_play_stack;
|
|
}
|
|
|
|
// draw a new card if the draw pile is not empty
|
|
if (!_draw_pile.empty()) {
|
|
--_weighted_draw_pile_size;
|
|
|
|
const CardMultiplicity draw = _draw_pile.front();
|
|
_draw_pile.pop_front();
|
|
ASSERT(draw.multiplicity > 0);
|
|
|
|
if (draw.multiplicity > 1) {
|
|
_draw_pile.push_back(draw);
|
|
_draw_pile.back().multiplicity--;
|
|
}
|
|
|
|
Card& card_in_hand = _hands[_turn][index];
|
|
card_in_hand = draw.card;
|
|
card_in_hand.copy = draw.multiplicity - 1;
|
|
|
|
if (_stacks[draw.card.suit] > draw.card.rank) {
|
|
// _card_positions[card_in_hand] = _turn;
|
|
}
|
|
|
|
if(_draw_pile.empty()) {
|
|
// Note the +1, since we will immediately decrement this when moving to the next player
|
|
_endgame_turns_left = num_players + 1;
|
|
}
|
|
return draw.multiplicity;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::revert_draw(std::uint8_t index, Card discarded_card) {
|
|
if (_endgame_turns_left == num_players + 1 || _endgame_turns_left == no_endgame) {
|
|
// Put the card that is currently in hand back into the draw pile
|
|
ASSERT(index < _hands[_turn].size());
|
|
const Card &drawn = _hands[_turn][index];
|
|
if (_stacks[drawn.suit] > drawn.rank) {
|
|
// _card_positions[drawn] = draw_pile;
|
|
}
|
|
|
|
// put discarded_card back into draw pile (at the back)
|
|
if (!_draw_pile.empty() and _draw_pile.back().card.suit == drawn.suit and
|
|
_draw_pile.back().card.rank == drawn.rank) {
|
|
_draw_pile.back().multiplicity++;
|
|
} else {
|
|
_draw_pile.push_back({drawn, 1});
|
|
}
|
|
_weighted_draw_pile_size++;
|
|
_endgame_turns_left = no_endgame;
|
|
}
|
|
_hands[_turn][index] = discarded_card;
|
|
if (_stacks[discarded_card.suit] > discarded_card.rank) {
|
|
// _card_positions[discarded_card] = _turn;
|
|
}
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::normalize_draw_and_positions() {
|
|
const Card trash = [this]() -> Card {
|
|
for(suit_t suit = 0; suit < num_suits; suit++) {
|
|
if(_stacks[suit] < starting_card_rank) {
|
|
return {suit, starting_card_rank - 1, 0};
|
|
}
|
|
}
|
|
return {0,0,0};
|
|
}();
|
|
|
|
CardArray<num_suits, std::uint8_t, false> nums_in_draw_pile;
|
|
std::uint8_t num_trash_in_draw_pile = 0;
|
|
for(const auto [card, multiplicity] : _draw_pile) {
|
|
if (_stacks[card.suit] > card.rank) {
|
|
nums_in_draw_pile[card] += multiplicity;
|
|
} else {
|
|
num_trash_in_draw_pile++;
|
|
}
|
|
}
|
|
|
|
_draw_pile.clear();
|
|
for(suit_t suit = 0; suit < num_suits; suit++) {
|
|
for(rank_t rank = 0; rank < starting_card_rank; rank++) {
|
|
Card card {suit, rank, 0};
|
|
if (nums_in_draw_pile[card] > 0) {
|
|
_draw_pile.push_back({card, nums_in_draw_pile[card]});
|
|
for (std::uint8_t copy = 0; copy < nums_in_draw_pile[card]; copy++) {
|
|
card.copy = copy;
|
|
// _card_positions[card] = draw_pile;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
_draw_pile.push_back({trash, num_trash_in_draw_pile});
|
|
|
|
for(player_t player = 0; player < num_players; player++) {
|
|
for(Card& card : _hands[player]) {
|
|
if (_stacks[card.suit] > card.rank) {
|
|
card.copy = nums_in_draw_pile[card];
|
|
nums_in_draw_pile[card]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::revert_play(const BacktrackAction& action, bool was_on_8_clues) {
|
|
ASSERT(!was_on_8_clues or _num_clues == 8);
|
|
decr_turn();
|
|
if (action.discarded.rank == 0 and not was_on_8_clues) {
|
|
_num_clues--;
|
|
}
|
|
revert_draw(action.index, action.discarded);
|
|
_stacks[action.discarded.suit]++;
|
|
_score--;
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::revert_discard(const BacktrackAction& action) {
|
|
decr_turn();
|
|
ASSERT(_num_clues > 0);
|
|
_num_clues--;
|
|
_pace++;
|
|
revert_draw(action.index, action.discarded);
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
void HanabiState<num_suits, num_players, hand_size>::revert_clue() {
|
|
decr_turn();
|
|
ASSERT(_num_clues < max_num_clues);
|
|
_num_clues++;
|
|
}
|
|
|
|
#define UPDATE_PROBABILITY(new_probability) \
|
|
best_probability = std::max(best_probability, new_probability); \
|
|
if (best_probability == 1) { \
|
|
return best_probability; \
|
|
}
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
double HanabiState<num_suits, num_players, hand_size>::backtrack(size_t depth) {
|
|
_enumerated_states++;
|
|
if (_score == 5 * num_suits) {
|
|
return 1;
|
|
}
|
|
if(_pace < 0 || _endgame_turns_left == 0) {
|
|
return 0;
|
|
}
|
|
|
|
// TODO: Have some endgame analysis here?
|
|
|
|
// First, check if we have any playable cards
|
|
double best_probability = 0;
|
|
const std::array<Card, hand_size> hand = _hands[_turn];
|
|
|
|
// First, check for playables
|
|
for(std::uint8_t index = 0; index < hand_size; index++) {
|
|
if(is_playable(hand[index])) {
|
|
if (_draw_pile.empty()) {
|
|
bool on_8_clues = _num_clues == 8;
|
|
BacktrackAction action = play(index);
|
|
const double probability_for_this_play = backtrack(depth + 1);
|
|
revert_play(action, on_8_clues);
|
|
UPDATE_PROBABILITY(probability_for_this_play);
|
|
} else {
|
|
double sum_of_probabilities = 0;
|
|
uint8_t sum_of_mults = 0;
|
|
for (size_t i = 0; i < _draw_pile.size(); i++) {
|
|
bool on_8_clues = _num_clues == 8;
|
|
BacktrackAction action = play(index);
|
|
sum_of_probabilities += backtrack(depth + 1) * action.multiplicity;
|
|
sum_of_mults += action.multiplicity;
|
|
revert_play(action, on_8_clues);
|
|
ASSERT(sum_of_mults <= _weighted_draw_pile_size);
|
|
}
|
|
ASSERT(sum_of_mults == _weighted_draw_pile_size);
|
|
const double probability_for_this_play = sum_of_probabilities / _weighted_draw_pile_size;
|
|
UPDATE_PROBABILITY(probability_for_this_play);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check for discards now
|
|
if(_pace > 0 and _num_clues < max_num_clues) {
|
|
for(std::uint8_t index = 0; index < hand_size; index++) {
|
|
if (is_trash(hand[index])) {
|
|
double sum_of_probabilities = 0;
|
|
if (_draw_pile.empty()) {
|
|
BacktrackAction action = discard(index);
|
|
const double probability_for_this_discard = backtrack(depth + 1);
|
|
revert_discard(action);
|
|
UPDATE_PROBABILITY(probability_for_this_discard);
|
|
} else {
|
|
uint8_t sum_of_mults = 0;
|
|
for (size_t i = 0; i < _draw_pile.size(); i++) {
|
|
BacktrackAction action = discard(index);
|
|
sum_of_probabilities += backtrack(depth + 1) * action.multiplicity;
|
|
sum_of_mults += action.multiplicity;
|
|
revert_discard(action);
|
|
}
|
|
ASSERT(sum_of_mults == _weighted_draw_pile_size);
|
|
const double probability_discard = sum_of_probabilities / _weighted_draw_pile_size;
|
|
UPDATE_PROBABILITY(probability_discard);
|
|
}
|
|
|
|
// All discards are equivalent, do not continue searching for different trash
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Last option is to stall
|
|
if(_num_clues > 0) {
|
|
clue();
|
|
const double probability_stall = backtrack(depth + 1);
|
|
revert_clue();
|
|
UPDATE_PROBABILITY(probability_stall);
|
|
}
|
|
|
|
return best_probability;
|
|
}
|
|
|
|
} // namespace Hanabi
|