Endgame-Analyzer/game_state.hpp

397 lines
14 KiB
C++
Raw Normal View History

2023-08-04 16:28:41 +02:00
#include <cassert>
#include <algorithm>
#include <iterator>
2023-08-05 11:55:46 +02:00
namespace Hanabi {
2023-08-04 16:28:41 +02:00
2023-08-05 11:55:46 +02:00
Card &Card::operator++() {
rank++;
return *this;
2023-08-04 16:28:41 +02:00
}
2023-08-05 11:55:46 +02:00
Card Card::successor() const { return {suit, static_cast<rank_t>(rank + 1)}; }
const Card Card::operator++(int) {
Card ret = *this;
rank++;
return ret;
}
template<std::size_t num_suits>
std::ostream &operator<<(std::ostream &os, const Stacks<num_suits> &stacks) {
for (size_t i = 0; i < stacks.size() - 1; i++) {
os << starting_card_rank - stacks[i] << ", ";
2023-08-05 11:55:46 +02:00
}
os << starting_card_rank - stacks.back();
2023-08-05 11:55:46 +02:00
return os;
}
template<std::size_t num_suits, typename T, bool respect_card_duplicity>
CardArray<num_suits, T, respect_card_duplicity>::CardArray(T default_val) {
2023-08-05 12:19:34 +02:00
for(size_t suit = 0; suit < num_suits; suit++) {
for (rank_t rank = 0; rank < starting_card_rank; rank++) {
if constexpr (respect_card_duplicity) {
std::ranges::fill(_vals.array[suit][rank], default_val);
} else {
_vals.array[suit][rank] = default_val;
}
2023-08-05 12:19:34 +02:00
}
}
}
template<std::size_t num_suits, typename T, bool respect_card_duplicity>
const T& CardArray<num_suits, T, respect_card_duplicity>::operator[](const Card &card) const {
if constexpr (respect_card_duplicity) {
return _vals.array[card.suit][card.rank][card.copy];
} else {
return _vals.array[card.suit][card.rank];
}
2023-08-05 11:55:46 +02:00
};
template<std::size_t num_suits, typename T, bool respect_card_duplicity>
T& CardArray<num_suits, T, respect_card_duplicity>::operator[](const Card &card) {
if constexpr (respect_card_duplicity) {
return _vals.array[card.suit][card.rank][card.copy];
} else {
return _vals.array[card.suit][card.rank];
}
2023-08-05 11:55:46 +02:00
};
template<size_t num_suits, player_t num_players, size_t hand_size>
HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> &deck):
2023-08-05 12:19:34 +02:00
_turn(0),
_num_clues(max_num_clues),
_weighted_draw_pile_size(deck.size()),
2023-08-05 12:19:34 +02:00
_stacks(),
_hands(),
_card_positions(draw_pile),
2023-08-06 11:54:57 +02:00
_draw_pile(),
_pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)),
_score(0) {
2023-08-05 12:19:34 +02:00
std::ranges::fill(_stacks, starting_card_rank);
for(const Card& card: deck) {
_draw_pile.push_back({card, 1});
}
for(player_t player = 0; player < num_players; player++) {
for(std::uint8_t index = 0; index < hand_size; index++) {
draw(index);
}
incr_turn();
}
assert(_turn == 0);
}
template<size_t num_suits, player_t num_players, size_t hand_size>
BacktrackAction HanabiState<num_suits, num_players, hand_size>::clue() {
2023-08-05 11:55:46 +02:00
assert(_num_clues > 0);
--_num_clues;
incr_turn();
2023-08-05 13:04:51 +02:00
return BacktrackAction{ActionType::clue, {}, {}};
2023-08-05 11:55:46 +02:00
}
template<size_t num_suits, player_t num_players, size_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::incr_turn() {
2023-08-05 11:55:46 +02:00
_turn = (_turn + 1) % num_players;
2023-08-06 11:54:57 +02:00
if(endgame_turns_left != -1) {
endgame_turns_left--;
}
2023-08-05 11:55:46 +02:00
}
template<size_t num_suits, player_t num_players, size_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::decr_turn() {
2023-08-05 11:55:46 +02:00
_turn = (_turn + num_players - 1) % num_players;
2023-08-06 11:54:57 +02:00
if (endgame_turns_left != -1) {
endgame_turns_left++;
}
}
template<size_t num_suits, player_t num_players, size_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_playable(const Hanabi::Card &card) const {
return card.rank == _stacks[card.suit] - 1;
}
template<size_t num_suits, player_t num_players, size_t hand_size>
bool HanabiState<num_suits, num_players, hand_size>::is_trash(const Hanabi::Card &card) const {
return card.rank >= _stacks[card.suit];
2023-08-05 11:55:46 +02:00
}
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
BacktrackAction HanabiState<num_suits, num_players, hand_size>::play(
2023-08-05 11:55:46 +02:00
std::uint8_t index) {
assert(index < _hands[_turn].size());
const Card card = _hands[_turn][index];
assert(card.rank == _stacks[card.suit] - 1);
--_stacks[card.suit];
2023-08-06 11:54:57 +02:00
_score++;
2023-08-05 11:55:46 +02:00
2023-08-06 11:54:57 +02:00
BacktrackAction ret{ActionType::play, _hands[_turn][index], index, 0};
2023-08-05 11:55:46 +02:00
if (card.rank == 0) {
// update clues if we played the last card of a stack
_num_clues++;
}
2023-08-06 11:54:57 +02:00
ret.multiplicity = draw(index);
2023-08-05 11:55:46 +02:00
incr_turn();
return ret;
}
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
2023-08-06 11:54:57 +02:00
BacktrackAction HanabiState<num_suits, num_players, hand_size>::discard(std::uint8_t index) {
2023-08-05 11:55:46 +02:00
assert(index < _hands[_turn].size());
assert(_num_clues != max_num_clues);
_num_clues++;
2023-08-06 11:54:57 +02:00
_pace--;
2023-08-05 11:55:46 +02:00
2023-08-05 13:04:51 +02:00
BacktrackAction ret{ActionType::discard, _hands[_turn][index], index};
2023-08-05 11:55:46 +02:00
2023-08-06 11:54:57 +02:00
ret.multiplicity = draw(index);
2023-08-05 11:55:46 +02:00
incr_turn();
return ret;
}
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
std::uint8_t HanabiState<num_suits, num_players, hand_size>::find_card_in_hand(
const Hanabi::Card &card) const {
for(std::uint8_t i = 0; i < hand_size; i++) {
if(_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) {
return i;
}
}
return -1;
}
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
std::ostream &operator<<(std::ostream &os, const HanabiState<num_suits, num_players, hand_size> hanabi_state) {
os << "Stacks: " << hanabi_state._stacks << " (score " << +hanabi_state._score << ")";
os << ", clues: " << +hanabi_state._num_clues << std::endl;
2023-08-05 11:55:46 +02:00
os << "Draw pile: ";
for (const auto &[card, mul]: hanabi_state._draw_pile) {
os << card;
if (mul > 1) {
os << " (" << +mul << ")";
}
os << ", ";
2023-08-05 11:55:46 +02:00
}
os << "(size " << +hanabi_state._weighted_draw_pile_size << ")" << std::endl;
2023-08-05 11:55:46 +02:00
os << "Hands: ";
for (const auto &hand: hanabi_state._hands) {
for (const auto &card: hand) {
os << card << ", ";
}
os << " | ";
}
return os;
}
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
2023-08-06 11:54:57 +02:00
std::uint8_t HanabiState<num_suits, num_players, hand_size>::draw(uint8_t index) {
2023-08-05 11:55:46 +02:00
assert(index < _hands[_turn].size());
const Card& discarded = _hands[_turn][index];
if (_stacks[discarded.suit] > discarded.rank) {
_card_positions[_hands[_turn][index]] = trash_or_play_stack;
}
2023-08-05 11:55:46 +02:00
// draw a new card if the draw pile is not empty
if (!_draw_pile.empty()) {
2023-08-06 11:54:57 +02:00
--_weighted_draw_pile_size;
const CardMultiplicity draw = _draw_pile.front();
2023-08-05 11:55:46 +02:00
_draw_pile.pop_front();
assert(draw.multiplicity > 0);
2023-08-06 11:54:57 +02:00
2023-08-05 11:55:46 +02:00
if (draw.multiplicity > 1) {
_draw_pile.push_back(draw);
2023-08-06 11:54:57 +02:00
_draw_pile.back().multiplicity--;
2023-08-05 11:55:46 +02:00
}
2023-08-06 11:54:57 +02:00
Card& card_in_hand = _hands[_turn][index];
card_in_hand = draw.card;
card_in_hand.copy = draw.multiplicity - 1;
if (_stacks[draw.card.suit] > draw.card.rank) {
2023-08-06 11:54:57 +02:00
_card_positions[card_in_hand] = _turn;
}
if(_draw_pile.empty()) {
endgame_turns_left = num_players;
}
2023-08-06 11:54:57 +02:00
return draw.multiplicity;
2023-08-05 11:55:46 +02:00
}
2023-08-06 11:54:57 +02:00
return 0;
2023-08-05 11:55:46 +02:00
}
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_draw(std::uint8_t index, Card card) {
2023-08-06 11:54:57 +02:00
endgame_turns_left = -1;
2023-08-05 11:55:46 +02:00
assert(index < _hands[_turn].size());
const Card& discarded = _hands[_turn][index];
if (_stacks[discarded.suit] > discarded.rank) {
_card_positions[discarded] = draw_pile;
}
2023-08-05 11:55:46 +02:00
// put card back into draw pile (at the back)
if (!_draw_pile.empty() and _draw_pile.back().card.suit == _hands[_turn][index].suit and _draw_pile.back().card.rank == _hands[_turn][index].rank) {
2023-08-05 11:55:46 +02:00
_draw_pile.back().multiplicity++;
} else {
_draw_pile.push_back({_hands[_turn][index], 1});
}
_hands[_turn][index] = card;
if (_stacks[card.suit] > card.rank) {
_card_positions[card] = _turn;
}
2023-08-06 11:54:57 +02:00
_weighted_draw_pile_size++;
2023-08-05 11:55:46 +02:00
}
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::normalize_draw_and_positions() {
const Card trash = [this]() -> Card {
for(suit_t suit = 0; suit < num_suits; suit++) {
if(_stacks[suit] < starting_card_rank) {
return {suit, starting_card_rank - 1, 0};
}
}
return {0,0,0};
}();
CardArray<num_suits, std::uint8_t, false> nums_in_draw_pile;
std::uint8_t num_trash_in_draw_pile = 0;
for(const auto [card, multiplicity] : _draw_pile) {
if (_stacks[card.suit] > card.rank) {
nums_in_draw_pile[card] += multiplicity;
} else {
num_trash_in_draw_pile++;
}
}
_draw_pile.clear();
for(suit_t suit = 0; suit < num_suits; suit++) {
for(rank_t rank = 0; rank < starting_card_rank; rank++) {
Card card {suit, rank, 0};
if (nums_in_draw_pile[card] > 0) {
_draw_pile.push_back({card, nums_in_draw_pile[card]});
for (std::uint8_t copy = 0; copy < nums_in_draw_pile[card]; copy++) {
card.copy = copy;
_card_positions[card] = draw_pile;
}
}
}
}
_draw_pile.push_back({trash, num_trash_in_draw_pile});
for(player_t player = 0; player < num_players; player++) {
for(Card& card : _hands[player]) {
if (_stacks[card.suit] > card.rank) {
card.copy = nums_in_draw_pile[card];
nums_in_draw_pile[card]++;
}
}
}
}
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert(
2023-08-05 13:04:51 +02:00
const BacktrackAction &action) {
2023-08-05 11:55:46 +02:00
decr_turn();
switch (action.type) {
case ActionType::clue:
assert(_num_clues < max_num_clues);
_num_clues++;
break;
case ActionType::discard:
assert(_num_clues > 0);
_num_clues--;
2023-08-06 11:54:57 +02:00
_pace++;
2023-08-05 11:55:46 +02:00
revert_draw(action.index, action.discarded);
break;
case ActionType::play:
if (action.discarded.rank == 0) {
_num_clues--;
}
revert_draw(action.index, action.discarded);
_stacks[action.discarded.suit]++;
2023-08-06 11:54:57 +02:00
_score--;
2023-08-05 13:04:51 +02:00
default:
break;
2023-08-05 11:55:46 +02:00
}
}
2023-08-06 11:54:57 +02:00
#define UPDATE_PROBABILITY(new_probability) \
best_probability = std::max(best_probability, new_probability); \
if (best_probability == 1) { \
return best_probability; \
}
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
double HanabiState<num_suits, num_players, hand_size>::backtrack() {
std::cout << *this << std::endl;
if (_score == 5 * num_suits) {
return 1;
}
if(_pace < 0 || endgame_turns_left == 0) {
return 0;
}
// TODO: Have some endgame analysis here?
// First, check if we have any playable cards
double best_probability = 0;
const std::array<Card, hand_size> hand = _hands[_turn];
// First, check for playables
for(std::uint8_t index = 0; index < hand_size; index++) {
if(is_playable(hand[index])) {
double sum_of_probabilities = 0;
uint8_t sum_of_mults = 0;
2023-08-06 11:54:57 +02:00
for(size_t i = 0; i < _draw_pile.size(); i++) {
BacktrackAction action = play(index);
sum_of_probabilities += backtrack() * action.multiplicity;
sum_of_mults += action.multiplicity;
2023-08-06 11:54:57 +02:00
revert(action);
assert(sum_of_mults <= _weighted_draw_pile_size);
2023-08-06 11:54:57 +02:00
}
assert(sum_of_mults == _weighted_draw_pile_size);
2023-08-06 11:54:57 +02:00
const double probability_for_this_play = sum_of_probabilities / _weighted_draw_pile_size;
UPDATE_PROBABILITY(probability_for_this_play);
}
}
// Check for discards now
if(_pace > 0) {
for(std::uint8_t index = 0; index < hand_size; index++) {
if (is_trash(hand[index])) {
double sum_of_probabilities = 0;
for(size_t i = 0; i < _draw_pile.size(); i++) {
BacktrackAction action = discard(index);
sum_of_probabilities += backtrack() * action.multiplicity;
revert(action);
}
const double probability_discard = sum_of_probabilities / _weighted_draw_pile_size;
UPDATE_PROBABILITY(probability_discard);
// All discards are equivalent, do not continue searching for different trash
break;
}
}
}
// Last option is to stall
if(_num_clues > 0) {
BacktrackAction action = clue();
const double probability_stall = backtrack();
revert(action);
UPDATE_PROBABILITY(probability_stall);
}
return best_probability;
}
2023-08-05 11:55:46 +02:00
} // namespace Hanabi