keep track of probabilities in tablebase
This commit is contained in:
parent
f29e3d1202
commit
c394338c24
3 changed files with 111 additions and 53 deletions
|
@ -5,6 +5,7 @@
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
#include <unordered_map>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
@ -162,6 +163,7 @@ public:
|
||||||
[[nodiscard]] virtual bool is_playable(const Card& card) const = 0;
|
[[nodiscard]] virtual bool is_playable(const Card& card) const = 0;
|
||||||
|
|
||||||
[[nodiscard]] virtual std::uint64_t enumerated_states() const = 0;
|
[[nodiscard]] virtual std::uint64_t enumerated_states() const = 0;
|
||||||
|
[[nodiscard]] virtual std::unordered_map<unsigned long, double> visited_states() const = 0;
|
||||||
|
|
||||||
virtual void normalize_draw_and_positions() = 0;
|
virtual void normalize_draw_and_positions() = 0;
|
||||||
|
|
||||||
|
@ -194,6 +196,7 @@ public:
|
||||||
[[nodiscard]] bool is_playable(const Card& card) const final;
|
[[nodiscard]] bool is_playable(const Card& card) const final;
|
||||||
|
|
||||||
[[nodiscard]] std::uint64_t enumerated_states() const final;
|
[[nodiscard]] std::uint64_t enumerated_states() const final;
|
||||||
|
[[nodiscard]] std::unordered_map<unsigned long, double> visited_states() const final;
|
||||||
|
|
||||||
void normalize_draw_and_positions() final;
|
void normalize_draw_and_positions() final;
|
||||||
|
|
||||||
|
@ -232,13 +235,17 @@ private:
|
||||||
|
|
||||||
// This will indicate whether cards that were in hands initially still are in hands
|
// This will indicate whether cards that were in hands initially still are in hands
|
||||||
std::bitset<num_players * hand_size> _card_positions_hands;
|
std::bitset<num_players * hand_size> _card_positions_hands;
|
||||||
uint8_t _num_useful_cards_in_starting_hands;
|
|
||||||
|
size_t _num_useful_cards_in_starting_hands;
|
||||||
|
size_t _initial_draw_pile_size;
|
||||||
|
|
||||||
// further statistics that we might want to keep track of
|
// further statistics that we might want to keep track of
|
||||||
uint8_t _pace{};
|
uint8_t _pace{};
|
||||||
uint8_t _score{};
|
uint8_t _score{};
|
||||||
|
|
||||||
std::uint64_t _enumerated_states {};
|
std::uint64_t _enumerated_states {};
|
||||||
|
|
||||||
|
std::unordered_map<unsigned long, double> _position_tablebase;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
template <std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
||||||
|
|
154
game_state.hpp
154
game_state.hpp
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
namespace Hanabi {
|
namespace Hanabi {
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, HanabiStateIF const& hanabi_state) {
|
std::ostream &operator<<(std::ostream &os, HanabiStateIF const &hanabi_state) {
|
||||||
hanabi_state.print(os);
|
hanabi_state.print(os);
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
@ -29,7 +29,7 @@ namespace Hanabi {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t num_suits>
|
template<size_t num_suits>
|
||||||
std::ostream& operator<<(std::ostream &os, const Stacks<num_suits> &stacks) {
|
std::ostream &operator<<(std::ostream &os, const Stacks<num_suits> &stacks) {
|
||||||
for (size_t i = 0; i < stacks.size() - 1; i++) {
|
for (size_t i = 0; i < stacks.size() - 1; i++) {
|
||||||
os << starting_card_rank - stacks[i] << ", ";
|
os << starting_card_rank - stacks[i] << ", ";
|
||||||
}
|
}
|
||||||
|
@ -52,36 +52,37 @@ namespace Hanabi {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<suit_t num_suits, typename T>
|
template<suit_t num_suits, typename T>
|
||||||
const T& CardArray<num_suits, T>::operator[](const Card &card) const {
|
const T &CardArray<num_suits, T>::operator[](const Card &card) const {
|
||||||
return _array[card.suit][card.rank];
|
return _array[card.suit][card.rank];
|
||||||
};
|
};
|
||||||
|
|
||||||
template<suit_t num_suits, typename T>
|
template<suit_t num_suits, typename T>
|
||||||
T& CardArray<num_suits, T>::operator[](const Card &card) {
|
T &CardArray<num_suits, T>::operator[](const Card &card) {
|
||||||
return _array[card.suit][card.rank];
|
return _array[card.suit][card.rank];
|
||||||
};
|
};
|
||||||
|
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> &deck):
|
HanabiState<num_suits, num_players, hand_size>::HanabiState(const std::vector<Card> &deck):
|
||||||
_turn(0),
|
_turn(0),
|
||||||
_num_clues(max_num_clues),
|
_num_clues(max_num_clues),
|
||||||
_weighted_draw_pile_size(deck.size()),
|
_weighted_draw_pile_size(deck.size()),
|
||||||
_stacks(),
|
_stacks(),
|
||||||
_hands(),
|
_hands(),
|
||||||
_draw_pile(),
|
_draw_pile(),
|
||||||
_endgame_turns_left(no_endgame),
|
_endgame_turns_left(no_endgame),
|
||||||
_card_positions_draw(),
|
_card_positions_draw(),
|
||||||
_card_positions_hands(),
|
_card_positions_hands(),
|
||||||
_num_useful_cards_in_starting_hands(0),
|
_num_useful_cards_in_starting_hands(0),
|
||||||
_pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)),
|
_initial_draw_pile_size(0),
|
||||||
_score(0),
|
_pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)),
|
||||||
_enumerated_states(0) {
|
_score(0),
|
||||||
|
_enumerated_states(0) {
|
||||||
std::ranges::fill(_stacks, starting_card_rank);
|
std::ranges::fill(_stacks, starting_card_rank);
|
||||||
for(const Card& card: deck) {
|
for (const Card &card: deck) {
|
||||||
_draw_pile.push_back({card, 1});
|
_draw_pile.push_back({card, 1});
|
||||||
}
|
}
|
||||||
for(player_t player = 0; player < num_players; player++) {
|
for (player_t player = 0; player < num_players; player++) {
|
||||||
for(std::uint8_t index = 0; index < hand_size; index++) {
|
for (std::uint8_t index = 0; index < hand_size; index++) {
|
||||||
draw<false>(index);
|
draw<false>(index);
|
||||||
}
|
}
|
||||||
incr_turn();
|
incr_turn();
|
||||||
|
@ -100,7 +101,7 @@ namespace Hanabi {
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
void HanabiState<num_suits, num_players, hand_size>::incr_turn() {
|
void HanabiState<num_suits, num_players, hand_size>::incr_turn() {
|
||||||
_turn = (_turn + 1) % num_players;
|
_turn = (_turn + 1) % num_players;
|
||||||
if(_endgame_turns_left != no_endgame) {
|
if (_endgame_turns_left != no_endgame) {
|
||||||
_endgame_turns_left--;
|
_endgame_turns_left--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -184,12 +185,12 @@ namespace Hanabi {
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
std::uint8_t HanabiState<num_suits, num_players, hand_size>::find_card_in_hand(
|
std::uint8_t HanabiState<num_suits, num_players, hand_size>::find_card_in_hand(
|
||||||
const Hanabi::Card &card) const {
|
const Hanabi::Card &card) const {
|
||||||
for(std::uint8_t i = 0; i < hand_size; i++) {
|
for (std::uint8_t i = 0; i < hand_size; i++) {
|
||||||
if(_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) {
|
if (_hands[_turn][i].rank == card.rank && _hands[_turn][i].suit == card.suit) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
|
@ -220,7 +221,7 @@ namespace Hanabi {
|
||||||
ASSERT(index < _hands[_turn].size());
|
ASSERT(index < _hands[_turn].size());
|
||||||
|
|
||||||
// update card position of the card we are about to discard
|
// update card position of the card we are about to discard
|
||||||
if constexpr(update_card_positions) {
|
if constexpr (update_card_positions) {
|
||||||
const Card discarded = _hands[_turn][index];
|
const Card discarded = _hands[_turn][index];
|
||||||
if (!discarded.initial_trash) {
|
if (!discarded.initial_trash) {
|
||||||
if (discarded.in_starting_hand) {
|
if (discarded.in_starting_hand) {
|
||||||
|
@ -247,7 +248,7 @@ namespace Hanabi {
|
||||||
_draw_pile.back().multiplicity--;
|
_draw_pile.back().multiplicity--;
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr(update_card_positions) {
|
if constexpr (update_card_positions) {
|
||||||
// update card position of the drawn card
|
// update card position of the drawn card
|
||||||
if (!draw.card.initial_trash) {
|
if (!draw.card.initial_trash) {
|
||||||
ASSERT(draw.card.in_starting_hand == false);
|
ASSERT(draw.card.in_starting_hand == false);
|
||||||
|
@ -259,7 +260,7 @@ namespace Hanabi {
|
||||||
|
|
||||||
_hands[_turn][index] = draw.card;
|
_hands[_turn][index] = draw.card;
|
||||||
|
|
||||||
if(_draw_pile.empty()) {
|
if (_draw_pile.empty()) {
|
||||||
// Note the +1, since we will immediately decrement this when moving to the next player
|
// Note the +1, since we will immediately decrement this when moving to the next player
|
||||||
_endgame_turns_left = num_players + 1;
|
_endgame_turns_left = num_players + 1;
|
||||||
}
|
}
|
||||||
|
@ -301,7 +302,8 @@ namespace Hanabi {
|
||||||
ASSERT(_card_positions_hands[discarded_card.local_index] == false);
|
ASSERT(_card_positions_hands[discarded_card.local_index] == false);
|
||||||
_card_positions_hands[discarded_card.local_index] = true;
|
_card_positions_hands[discarded_card.local_index] = true;
|
||||||
} else {
|
} else {
|
||||||
auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card.local_index], trash_or_play_stack);
|
auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card.local_index],
|
||||||
|
trash_or_play_stack);
|
||||||
ASSERT(hand_card_it != _card_positions_draw[discarded_card.local_index].end());
|
ASSERT(hand_card_it != _card_positions_draw[discarded_card.local_index].end());
|
||||||
*hand_card_it = _turn;
|
*hand_card_it = _turn;
|
||||||
}
|
}
|
||||||
|
@ -314,16 +316,16 @@ namespace Hanabi {
|
||||||
void HanabiState<num_suits, num_players, hand_size>::normalize_draw_and_positions() {
|
void HanabiState<num_suits, num_players, hand_size>::normalize_draw_and_positions() {
|
||||||
// Note that this function does not have to be particularly performant, we only call it once to initialize.
|
// Note that this function does not have to be particularly performant, we only call it once to initialize.
|
||||||
const Card trash = [this]() -> Card {
|
const Card trash = [this]() -> Card {
|
||||||
for(suit_t suit = 0; suit < num_suits; suit++) {
|
for (suit_t suit = 0; suit < num_suits; suit++) {
|
||||||
if(_stacks[suit] < starting_card_rank) {
|
if (_stacks[suit] < starting_card_rank) {
|
||||||
return {suit, starting_card_rank - 1, 0, false, true};
|
return {suit, starting_card_rank - 1, 0, false, true};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return {0,0};
|
return {0, 0};
|
||||||
}();
|
}();
|
||||||
|
|
||||||
CardArray<num_suits, std::uint8_t> nums_in_draw_pile;
|
CardArray<num_suits, std::uint8_t> nums_in_draw_pile;
|
||||||
for(const auto [card, multiplicity] : _draw_pile) {
|
for (const auto [card, multiplicity]: _draw_pile) {
|
||||||
if (_stacks[card.suit] > card.rank) {
|
if (_stacks[card.suit] > card.rank) {
|
||||||
nums_in_draw_pile[card] += multiplicity;
|
nums_in_draw_pile[card] += multiplicity;
|
||||||
} else {
|
} else {
|
||||||
|
@ -333,27 +335,28 @@ namespace Hanabi {
|
||||||
|
|
||||||
// Prepare draw pile
|
// Prepare draw pile
|
||||||
_draw_pile.clear();
|
_draw_pile.clear();
|
||||||
for(suit_t suit = 0; suit < num_suits; suit++) {
|
for (suit_t suit = 0; suit < num_suits; suit++) {
|
||||||
for(rank_t rank = 0; rank < starting_card_rank; rank++) {
|
for (rank_t rank = 0; rank < starting_card_rank; rank++) {
|
||||||
Card card {suit, rank, static_cast<uint8_t>(_card_positions_draw.size()), false, is_trash(card)};
|
Card card{suit, rank, static_cast<uint8_t>(_card_positions_draw.size()), false, is_trash(card)};
|
||||||
if (nums_in_draw_pile[card] > 0) {
|
if (nums_in_draw_pile[card] > 0) {
|
||||||
_draw_pile.push_back({card, nums_in_draw_pile[card]});
|
_draw_pile.push_back({card, nums_in_draw_pile[card]});
|
||||||
if(!is_trash(card)) {
|
if (!is_trash(card)) {
|
||||||
_card_positions_draw.push_back({nums_in_draw_pile[card], draw_pile});
|
_card_positions_draw.push_back({nums_in_draw_pile[card], draw_pile});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_initial_draw_pile_size = _weighted_draw_pile_size;
|
||||||
|
|
||||||
// Prepare cards in hands
|
// Prepare cards in hands
|
||||||
for(player_t player = 0; player < num_players; player++) {
|
for (player_t player = 0; player < num_players; player++) {
|
||||||
for(Card& card : _hands[player]) {
|
for (Card &card: _hands[player]) {
|
||||||
card.initial_trash = is_trash(card);
|
card.initial_trash = is_trash(card);
|
||||||
card.in_starting_hand = true;
|
card.in_starting_hand = true;
|
||||||
// Needed to check for dupes in same hand
|
// Needed to check for dupes in same hand
|
||||||
boost::container::static_vector<Card, hand_size> good_cards_in_hand;
|
boost::container::static_vector<Card, hand_size> good_cards_in_hand;
|
||||||
if(!is_trash(card)) {
|
if (!is_trash(card)) {
|
||||||
if(std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0) {
|
if (std::count(good_cards_in_hand.begin(), good_cards_in_hand.end(), card) > 0) {
|
||||||
// This card is already in hand, so just replace the second copy by some trash
|
// This card is already in hand, so just replace the second copy by some trash
|
||||||
card = trash;
|
card = trash;
|
||||||
} else {
|
} else {
|
||||||
|
@ -366,13 +369,14 @@ namespace Hanabi {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_card_positions_hands.reset();
|
_card_positions_hands.reset();
|
||||||
for(size_t i = 0; i < _num_useful_cards_in_starting_hands; i++) {
|
for (size_t i = 0; i < _num_useful_cards_in_starting_hands; i++) {
|
||||||
_card_positions_hands[i] = true;
|
_card_positions_hands[i] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
void HanabiState<num_suits, num_players, hand_size>::revert_play(const BacktrackAction& action, bool was_on_8_clues) {
|
void
|
||||||
|
HanabiState<num_suits, num_players, hand_size>::revert_play(const BacktrackAction &action, bool was_on_8_clues) {
|
||||||
ASSERT(!was_on_8_clues or _num_clues == 8);
|
ASSERT(!was_on_8_clues or _num_clues == 8);
|
||||||
decr_turn();
|
decr_turn();
|
||||||
if (action.discarded.rank == 0 and not was_on_8_clues) {
|
if (action.discarded.rank == 0 and not was_on_8_clues) {
|
||||||
|
@ -384,7 +388,7 @@ namespace Hanabi {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
void HanabiState<num_suits, num_players, hand_size>::revert_discard(const BacktrackAction& action) {
|
void HanabiState<num_suits, num_players, hand_size>::revert_discard(const BacktrackAction &action) {
|
||||||
decr_turn();
|
decr_turn();
|
||||||
ASSERT(_num_clues > 0);
|
ASSERT(_num_clues > 0);
|
||||||
_num_clues--;
|
_num_clues--;
|
||||||
|
@ -399,15 +403,26 @@ namespace Hanabi {
|
||||||
_num_clues++;
|
_num_clues++;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define UPDATE_PROBABILITY(new_probability) \
|
#define RETURN_PROBABILITY \
|
||||||
best_probability = std::max(best_probability, new_probability); \
|
if (_position_tablebase.contains(id_of_state)) { \
|
||||||
if (best_probability == 1) { \
|
ASSERT(_position_tablebase[id_of_state] == best_probability); \
|
||||||
return best_probability; \
|
} else { \
|
||||||
}
|
_position_tablebase[id_of_state] = best_probability; \
|
||||||
|
} \
|
||||||
|
return best_probability;
|
||||||
|
|
||||||
|
|
||||||
|
#define UPDATE_PROBABILITY(new_probability) \
|
||||||
|
best_probability = std::max(best_probability, new_probability); \
|
||||||
|
if (best_probability == 1) { \
|
||||||
|
RETURN_PROBABILITY; \
|
||||||
|
}
|
||||||
|
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
double HanabiState<num_suits, num_players, hand_size>::backtrack(size_t depth) {
|
double HanabiState<num_suits, num_players, hand_size>::backtrack(size_t depth) {
|
||||||
_enumerated_states++;
|
_enumerated_states++;
|
||||||
|
const unsigned long id_of_state = unique_id();
|
||||||
|
|
||||||
if (_score == 5 * num_suits) {
|
if (_score == 5 * num_suits) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -485,13 +500,48 @@ namespace Hanabi {
|
||||||
UPDATE_PROBABILITY(probability_stall);
|
UPDATE_PROBABILITY(probability_stall);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_position_tablebase[id_of_state] = best_probability;
|
||||||
return best_probability;
|
return best_probability;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
std::uint64_t HanabiState<num_suits, num_players, hand_size>::unique_id() const {
|
std::uint64_t HanabiState<num_suits, num_players, hand_size>::unique_id() const {
|
||||||
unsigned long id = _card_positions_hands.to_ulong();
|
unsigned long id = 0;
|
||||||
|
|
||||||
|
// encode all positions of cards that started in draw pile
|
||||||
|
for(const auto & positions: _card_positions_draw) {
|
||||||
|
for(player_t player : positions) {
|
||||||
|
id *= num_players + 2;
|
||||||
|
id += player;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// encode number of clues
|
||||||
|
id *= max_num_clues + 1;
|
||||||
|
id += _num_clues;
|
||||||
|
|
||||||
|
// encode draw pile size
|
||||||
|
id *= _initial_draw_pile_size;
|
||||||
|
id += _weighted_draw_pile_size;
|
||||||
|
|
||||||
|
// encode positions of cards that started in hands
|
||||||
|
id = id << _num_useful_cards_in_starting_hands;
|
||||||
|
id += _card_positions_hands.to_ulong();
|
||||||
|
|
||||||
|
id *= num_players;
|
||||||
|
id += _turn;
|
||||||
|
|
||||||
|
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
|
||||||
|
// the number of clues, the draw pile size and whose turn it is.
|
||||||
|
// This already uniquely determines the current players position, assuming that we never discard good cards
|
||||||
|
// (and only play them)
|
||||||
|
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
|
std::unordered_map<unsigned long, double> HanabiState<num_suits, num_players, hand_size>::visited_states() const {
|
||||||
|
return _position_tablebase;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Hanabi
|
} // namespace Hanabi
|
1
main.cpp
1
main.cpp
|
@ -21,6 +21,7 @@ namespace Hanabi {
|
||||||
std::cout.precision(10);
|
std::cout.precision(10);
|
||||||
std::cout << "Probability with optimal play: " << res << std::endl;
|
std::cout << "Probability with optimal play: " << res << std::endl;
|
||||||
std::cout << "Enumerated " << game->enumerated_states() << " states" << std::endl;
|
std::cout << "Enumerated " << game->enumerated_states() << " states" << std::endl;
|
||||||
|
std::cout << "Visited " << game->visited_states().size() << " unique game states. " << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_sizes() {
|
void print_sizes() {
|
||||||
|
|
Loading…
Reference in a new issue