keep track of probabilities in tablebase

This commit is contained in:
Maximilian Keßler 2023-08-07 12:48:25 +02:00
parent f29e3d1202
commit c394338c24
Signed by: max
GPG key ID: BCC5A619923C0BA5
3 changed files with 111 additions and 53 deletions

View file

@ -5,6 +5,7 @@
#include <cstdint> #include <cstdint>
#include <algorithm> #include <algorithm>
#include <cstddef> #include <cstddef>
#include <unordered_map>
#include <bitset> #include <bitset>
#include <limits> #include <limits>
#include <optional> #include <optional>
@ -162,6 +163,7 @@ public:
[[nodiscard]] virtual bool is_playable(const Card& card) const = 0; [[nodiscard]] virtual bool is_playable(const Card& card) const = 0;
[[nodiscard]] virtual std::uint64_t enumerated_states() const = 0; [[nodiscard]] virtual std::uint64_t enumerated_states() const = 0;
[[nodiscard]] virtual std::unordered_map<unsigned long, double> visited_states() const = 0;
virtual void normalize_draw_and_positions() = 0; virtual void normalize_draw_and_positions() = 0;
@ -194,6 +196,7 @@ public:
[[nodiscard]] bool is_playable(const Card& card) const final; [[nodiscard]] bool is_playable(const Card& card) const final;
[[nodiscard]] std::uint64_t enumerated_states() const final; [[nodiscard]] std::uint64_t enumerated_states() const final;
[[nodiscard]] std::unordered_map<unsigned long, double> visited_states() const final;
void normalize_draw_and_positions() final; void normalize_draw_and_positions() final;
@ -232,13 +235,17 @@ private:
// This will indicate whether cards that were in hands initially still are in hands // This will indicate whether cards that were in hands initially still are in hands
std::bitset<num_players * hand_size> _card_positions_hands; std::bitset<num_players * hand_size> _card_positions_hands;
uint8_t _num_useful_cards_in_starting_hands;
size_t _num_useful_cards_in_starting_hands;
size_t _initial_draw_pile_size;
// further statistics that we might want to keep track of // further statistics that we might want to keep track of
uint8_t _pace{}; uint8_t _pace{};
uint8_t _score{}; uint8_t _score{};
std::uint64_t _enumerated_states {}; std::uint64_t _enumerated_states {};
std::unordered_map<unsigned long, double> _position_tablebase;
}; };
template <std::size_t num_suits, player_t num_players, std::size_t hand_size> template <std::size_t num_suits, player_t num_players, std::size_t hand_size>

View file

@ -73,6 +73,7 @@ namespace Hanabi {
_card_positions_draw(), _card_positions_draw(),
_card_positions_hands(), _card_positions_hands(),
_num_useful_cards_in_starting_hands(0), _num_useful_cards_in_starting_hands(0),
_initial_draw_pile_size(0),
_pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)), _pace(deck.size() - 5 * num_suits - num_players * (hand_size - 1)),
_score(0), _score(0),
_enumerated_states(0) { _enumerated_states(0) {
@ -301,7 +302,8 @@ namespace Hanabi {
ASSERT(_card_positions_hands[discarded_card.local_index] == false); ASSERT(_card_positions_hands[discarded_card.local_index] == false);
_card_positions_hands[discarded_card.local_index] = true; _card_positions_hands[discarded_card.local_index] = true;
} else { } else {
auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card.local_index], trash_or_play_stack); auto hand_card_it = std::ranges::find(_card_positions_draw[discarded_card.local_index],
trash_or_play_stack);
ASSERT(hand_card_it != _card_positions_draw[discarded_card.local_index].end()); ASSERT(hand_card_it != _card_positions_draw[discarded_card.local_index].end());
*hand_card_it = _turn; *hand_card_it = _turn;
} }
@ -344,6 +346,7 @@ namespace Hanabi {
} }
} }
} }
_initial_draw_pile_size = _weighted_draw_pile_size;
// Prepare cards in hands // Prepare cards in hands
for (player_t player = 0; player < num_players; player++) { for (player_t player = 0; player < num_players; player++) {
@ -372,7 +375,8 @@ namespace Hanabi {
} }
template<suit_t num_suits, player_t num_players, hand_index_t hand_size> template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::revert_play(const BacktrackAction& action, bool was_on_8_clues) { void
HanabiState<num_suits, num_players, hand_size>::revert_play(const BacktrackAction &action, bool was_on_8_clues) {
ASSERT(!was_on_8_clues or _num_clues == 8); ASSERT(!was_on_8_clues or _num_clues == 8);
decr_turn(); decr_turn();
if (action.discarded.rank == 0 and not was_on_8_clues) { if (action.discarded.rank == 0 and not was_on_8_clues) {
@ -399,15 +403,26 @@ namespace Hanabi {
_num_clues++; _num_clues++;
} }
#define RETURN_PROBABILITY \
if (_position_tablebase.contains(id_of_state)) { \
ASSERT(_position_tablebase[id_of_state] == best_probability); \
} else { \
_position_tablebase[id_of_state] = best_probability; \
} \
return best_probability;
#define UPDATE_PROBABILITY(new_probability) \ #define UPDATE_PROBABILITY(new_probability) \
best_probability = std::max(best_probability, new_probability); \ best_probability = std::max(best_probability, new_probability); \
if (best_probability == 1) { \ if (best_probability == 1) { \
return best_probability; \ RETURN_PROBABILITY; \
} }
template<suit_t num_suits, player_t num_players, hand_index_t hand_size> template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
double HanabiState<num_suits, num_players, hand_size>::backtrack(size_t depth) { double HanabiState<num_suits, num_players, hand_size>::backtrack(size_t depth) {
_enumerated_states++; _enumerated_states++;
const unsigned long id_of_state = unique_id();
if (_score == 5 * num_suits) { if (_score == 5 * num_suits) {
return 1; return 1;
} }
@ -485,13 +500,48 @@ namespace Hanabi {
UPDATE_PROBABILITY(probability_stall); UPDATE_PROBABILITY(probability_stall);
} }
_position_tablebase[id_of_state] = best_probability;
return best_probability; return best_probability;
} }
template<suit_t num_suits, player_t num_players, hand_index_t hand_size> template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::unique_id() const { std::uint64_t HanabiState<num_suits, num_players, hand_size>::unique_id() const {
unsigned long id = _card_positions_hands.to_ulong(); unsigned long id = 0;
// encode all positions of cards that started in draw pile
for(const auto & positions: _card_positions_draw) {
for(player_t player : positions) {
id *= num_players + 2;
id += player;
}
}
// encode number of clues
id *= max_num_clues + 1;
id += _num_clues;
// encode draw pile size
id *= _initial_draw_pile_size;
id += _weighted_draw_pile_size;
// encode positions of cards that started in hands
id = id << _num_useful_cards_in_starting_hands;
id += _card_positions_hands.to_ulong();
id *= num_players;
id += _turn;
// The id is unique now, since for all relevant cards, we know their position (including if they are played),
// the number of clues, the draw pile size and whose turn it is.
// This already uniquely determines the current players position, assuming that we never discard good cards
// (and only play them)
return id; return id;
} }
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::unordered_map<unsigned long, double> HanabiState<num_suits, num_players, hand_size>::visited_states() const {
return _position_tablebase;
}
} // namespace Hanabi } // namespace Hanabi

View file

@ -21,6 +21,7 @@ namespace Hanabi {
std::cout.precision(10); std::cout.precision(10);
std::cout << "Probability with optimal play: " << res << std::endl; std::cout << "Probability with optimal play: " << res << std::endl;
std::cout << "Enumerated " << game->enumerated_states() << " states" << std::endl; std::cout << "Enumerated " << game->enumerated_states() << " states" << std::endl;
std::cout << "Visited " << game->visited_states().size() << " unique game states. " << std::endl;
} }
void print_sizes() { void print_sizes() {