From a4ee7ace1dbf0998f93b27c0b3c856edf6c99f2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Ke=C3=9Fler?= Date: Thu, 10 Aug 2023 12:06:13 +0200 Subject: [PATCH] simplify backtrack interface by storing actions in stack --- game_state.h | 46 ++++++++++++-------- game_state.hpp | 116 ++++++++++++++++++++++++++++++------------------- 2 files changed, 101 insertions(+), 61 deletions(-) diff --git a/game_state.h b/game_state.h index 4a37f3b..7aa04a4 100644 --- a/game_state.h +++ b/game_state.h @@ -2,6 +2,7 @@ #define DYNAMIC_PROGRAM_GAME_STATE_H #include +#include #include #include #include @@ -80,6 +81,7 @@ constexpr Card y1 = {1, 1}; constexpr Card y2 = {1, 2}; constexpr Card y3 = {1, 3}; constexpr Card y4 = {1, 4}; +constexpr Card unknown_card = {0, 6}; /** * To store: @@ -144,21 +146,29 @@ enum class ActionType { }; struct BacktrackAction { - // The card that was discarded or played - Card discarded{}; - // Index of card in hand that was discarded or played - hand_index_t index{}; - // Multiplicity of new draw (needed for probability calculations) - hand_index_t multiplicity{}; + BacktrackAction(ActionType action_type, Card discarded_or_played, hand_index_t index); + + ActionType action_type{}; + // The card that was discarded or played + Card discarded{}; + // Index of card in hand that was discarded or played + hand_index_t index{}; }; +/** Would like to have 2 versions: + * All: + * - support playing cards, querying basic information + * - support going back, but with a different interface: efficient (needs arguments, does not store) or using a stack + * + */ + class HanabiStateIF { public: virtual probability_t backtrack(size_t depth) = 0; virtual void clue() = 0; - virtual BacktrackAction discard(hand_index_t index) = 0; - virtual BacktrackAction play(hand_index_t index) = 0; + virtual void discard(hand_index_t index) = 0; + virtual void play(hand_index_t index) = 0; [[nodiscard]] virtual hand_index_t find_card_in_hand(const Card& card) const = 0; [[nodiscard]] virtual bool is_trash(const Card& card) const = 0; @@ -187,12 +197,12 @@ public: probability_t backtrack(size_t depth) final; void clue() final; - BacktrackAction play(hand_index_t index) final; - BacktrackAction discard(hand_index_t index) final; + void play(hand_index_t index) final; + void discard(hand_index_t index) final; void revert_clue(); - void revert_play(const BacktrackAction &action, bool was_on_8_clues); - void revert_discard(const BacktrackAction &action); + void revert_play(bool was_on_8_clues); + void revert_discard(); [[nodiscard]] hand_index_t find_card_in_hand(const Card& card) const final; [[nodiscard]] bool is_trash(const Card& card) const final; @@ -210,10 +220,10 @@ protected: void print(std::ostream& os) const final; private: - template BacktrackAction play_and_potentially_update(hand_index_t index); - template BacktrackAction discard_and_potentially_update(hand_index_t index); + template unsigned long play_and_potentially_update(hand_index_t index); + template unsigned long discard_and_potentially_update(hand_index_t index); - template hand_index_t draw(hand_index_t index); + template unsigned long draw(hand_index_t index); void revert_draw(hand_index_t index, Card discarded_card); void incr_turn(); @@ -241,8 +251,8 @@ private: // This will indicate whether cards that were in hands initially still are in hands std::bitset _card_positions_hands; - size_t _num_useful_cards_in_starting_hands; - size_t _initial_draw_pile_size; + size_t _num_useful_cards_in_starting_hands{}; + size_t _initial_draw_pile_size{}; // further statistics that we might want to keep track of int8_t _pace{}; @@ -251,6 +261,8 @@ private: std::uint64_t _enumerated_states {}; std::unordered_map _position_tablebase; + + std::stack _actions_log; }; template diff --git a/game_state.hpp b/game_state.hpp index 316279c..899977c 100644 --- a/game_state.hpp +++ b/game_state.hpp @@ -61,6 +61,14 @@ namespace Hanabi { return _array[card.suit][card.rank]; }; + BacktrackAction::BacktrackAction( + Hanabi::ActionType action_type, Hanabi::Card discarded_or_played, Hanabi::hand_index_t index + ): + action_type(action_type), + discarded(discarded_or_played), + index(index) { + } + template HanabiState::HanabiState(const std::vector &deck): _turn(0), @@ -95,6 +103,7 @@ namespace Hanabi { ASSERT(_num_clues > 0); --_num_clues; + _actions_log.emplace(ActionType::clue, unknown_card, 0); incr_turn(); } @@ -130,56 +139,58 @@ namespace Hanabi { } template - BacktrackAction HanabiState::play(Hanabi::hand_index_t index) { + void HanabiState::play(Hanabi::hand_index_t index) { const Card card = _hands[_turn][index]; if (!is_playable(card)) { - BacktrackAction ret{card, index, draw(index)}; + draw(index); incr_turn(); - return ret; + return; } - return play_and_potentially_update(index); + play_and_potentially_update(index); } template template - BacktrackAction HanabiState::play_and_potentially_update(hand_index_t index) { + unsigned long HanabiState::play_and_potentially_update(hand_index_t index) { ASSERT(index < _hands[_turn].size()); - const Card card = _hands[_turn][index]; - ASSERT(is_playable(card)); + const Card played_card = _hands[_turn][index]; + ASSERT(is_playable(played_card)); - --_stacks[card.suit]; + --_stacks[played_card.suit]; _score++; - if (card.rank == 0 and _num_clues < max_num_clues) { - // update clues if we played the last card of a stack + if (played_card.rank == 0 and _num_clues < max_num_clues) { + // update clues if we played the last played_card of a stack _num_clues++; } - BacktrackAction ret{card, index, draw(index)}; + unsigned long multiplicity = draw(index); + _actions_log.emplace(ActionType::play, played_card, index); incr_turn(); - return ret; + return multiplicity; } template - BacktrackAction HanabiState::discard(std::uint8_t index) { - return discard_and_potentially_update(index); + void HanabiState::discard(std::uint8_t index) { + discard_and_potentially_update(index); } template template - BacktrackAction HanabiState::discard_and_potentially_update(hand_index_t index) { + unsigned long HanabiState::discard_and_potentially_update(hand_index_t index) { ASSERT(index < _hands[_turn].size()); ASSERT(_num_clues != max_num_clues); - const Card discarded = _hands[_turn][index]; + const Card discarded_card = _hands[_turn][index]; _num_clues++; _pace--; - BacktrackAction ret{discarded, index, draw(index)}; + unsigned long multiplicity = draw(index); + _actions_log.emplace(ActionType::discard, discarded_card, index); incr_turn(); - return ret; + return multiplicity; } template @@ -217,7 +228,7 @@ namespace Hanabi { template template - std::uint8_t HanabiState::draw(uint8_t index) { + unsigned long HanabiState::draw(uint8_t index) { ASSERT(index < _hands[_turn].size()); // update card position of the card we are about to discard @@ -377,45 +388,62 @@ namespace Hanabi { template void - HanabiState::revert_play(const BacktrackAction &action, bool was_on_8_clues) { + HanabiState::revert_play(bool was_on_8_clues) { + const BacktrackAction last_action = _actions_log.top(); + _actions_log.pop(); + ASSERT(last_action.action_type == ActionType::play); ASSERT(!was_on_8_clues or _num_clues == 8); + decr_turn(); - if (action.discarded.rank == 0 and not was_on_8_clues) { + if (last_action.discarded.rank == 0 and not was_on_8_clues) { _num_clues--; } - revert_draw(action.index, action.discarded); - _stacks[action.discarded.suit]++; + revert_draw(last_action.index, last_action.discarded); + _stacks[last_action.discarded.suit]++; _score--; } template - void HanabiState::revert_discard(const BacktrackAction &action) { + void HanabiState::revert_discard() { + const BacktrackAction last_action = _actions_log.top(); + _actions_log.pop(); + + ASSERT(last_action.action_type == ActionType::discard); + decr_turn(); ASSERT(_num_clues > 0); + _num_clues--; _pace++; - revert_draw(action.index, action.discarded); + + revert_draw(last_action.index, last_action.discarded); } template void HanabiState::revert_clue() { + const BacktrackAction last_action = _actions_log.top(); + _actions_log.pop(); + + ASSERT(last_action.action_type == ActionType::clue); + decr_turn(); ASSERT(_num_clues < max_num_clues); + _num_clues++; } #define RETURN_PROBABILITY \ if (_position_tablebase.contains(id_of_state)) { \ ASSERT(_position_tablebase[id_of_state] == best_probability); \ - } \ - _position_tablebase[id_of_state] = best_probability; \ - \ + } \ + _position_tablebase[id_of_state] = best_probability; \ + \ return best_probability; - #define UPDATE_PROBABILITY(new_probability) \ - best_probability = std::max(best_probability, new_probability); \ - if (best_probability == 1) { \ - RETURN_PROBABILITY; \ + #define UPDATE_PROBABILITY(new_probability) \ + best_probability = std::max(best_probability, new_probability); \ + if (best_probability == 1) { \ + RETURN_PROBABILITY; \ } template @@ -444,19 +472,19 @@ namespace Hanabi { if(is_playable(hand[index])) { if (_draw_pile.empty()) { bool on_8_clues = _num_clues == 8; - BacktrackAction action = play_and_potentially_update(index); + play_and_potentially_update(index); const probability_t probability_for_this_play = backtrack(depth + 1); - revert_play(action, on_8_clues); + revert_play(on_8_clues); UPDATE_PROBABILITY(probability_for_this_play); } else { probability_t sum_of_probabilities = 0; uint8_t sum_of_mults = 0; for (size_t i = 0; i < _draw_pile.size(); i++) { bool on_8_clues = _num_clues == 8; - BacktrackAction action = play_and_potentially_update(index); - sum_of_probabilities += backtrack(depth + 1) * action.multiplicity; - sum_of_mults += action.multiplicity; - revert_play(action, on_8_clues); + const unsigned long multiplicity = play_and_potentially_update(index); + sum_of_probabilities += backtrack(depth + 1) * multiplicity; + sum_of_mults += multiplicity; + revert_play(on_8_clues); ASSERT(sum_of_mults <= _weighted_draw_pile_size); } ASSERT(sum_of_mults == _weighted_draw_pile_size); @@ -472,17 +500,17 @@ namespace Hanabi { if (is_trash(hand[index])) { probability_t sum_of_probabilities = 0; if (_draw_pile.empty()) { - BacktrackAction action = discard_and_potentially_update(index); + discard_and_potentially_update(index); const probability_t probability_for_this_discard = backtrack(depth + 1); - revert_discard(action); + revert_discard(); UPDATE_PROBABILITY(probability_for_this_discard); } else { uint8_t sum_of_mults = 0; for (size_t i = 0; i < _draw_pile.size(); i++) { - BacktrackAction action = discard_and_potentially_update(index); - sum_of_probabilities += backtrack(depth + 1) * action.multiplicity; - sum_of_mults += action.multiplicity; - revert_discard(action); + const unsigned long multiplicity = discard_and_potentially_update(index); + sum_of_probabilities += backtrack(depth + 1) * multiplicity; + sum_of_mults += multiplicity; + revert_discard(); } ASSERT(sum_of_mults == _weighted_draw_pile_size); const probability_t probability_discard = sum_of_probabilities / _weighted_draw_pile_size;