#ifndef DYNAMIC_PROGRAM_GAME_STATE_H #define DYNAMIC_PROGRAM_GAME_STATE_H #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Hanabi { using rank_t = std::uint8_t; using suit_t = std::uint8_t; using clue_t = std::int8_t; using player_t = std::uint8_t; using hand_index_t = std::uint8_t; using probability_base_type = unsigned long; using rational_probability = boost::rational; /** * Define macro * NUSE_RATIONAL_PROBABILITIES * to use floating-point arithematic for the stored probabilities * instead of rational representations */ #ifndef NUSE_RATIONAL_PROBABILITIES using probability_t = boost::rational; #else using probability_t = double; #endif inline std::ostream& print_probability(std::ostream& os, double prob); inline std::ostream& print_probability(std::ostream& os, const rational_probability& prob); template std::ostream& print_probability(std::ostream& os, const std::optional& prob); /** * We will generally assume that stacks are played from n to 0 * Playing a 0 will yield a clue * Therefore, for the default hanabi, we will play 4,3,2,1,0 in that order * on each stack. A stack with no cards played implicitly has value 5 on it * This is just easier to implement, since then the remaining number of cards * to be played is always the current number of the stack */ constexpr rank_t starting_card_rank = 5; constexpr suit_t max_suit_index = 5; constexpr size_t max_card_duplicity = 3; constexpr clue_t max_num_clues = 8; constexpr uint8_t not_in_starting_hand = std::numeric_limits::max(); constexpr hand_index_t invalid_hand_idx = std::numeric_limits::max(); // We might want to change these at runtime to adapt to other variants. // However, a global variable is used so that we can have an output operator for cards reading from here // Note that this is therefore not static so that we have external linking inline std::array suit_initials = {'r', 'y', 'g', 'b', 'p', 't'}; struct Card { suit_t suit; rank_t rank; uint8_t local_index; bool in_starting_hand; bool initial_trash; inline bool operator==(const Card &other) const; }; namespace Cards { static constexpr Card r0 = {0, 5}; static constexpr Card r1 = {0, 4}; static constexpr Card r2 = {0, 3}; static constexpr Card r3 = {0, 2}; static constexpr Card r4 = {0, 1}; static constexpr Card r5 = {0, 0}; static constexpr Card y0 = {1, 5}; static constexpr Card y1 = {1, 4}; static constexpr Card y2 = {1, 3}; static constexpr Card y3 = {1, 2}; static constexpr Card y4 = {1, 1}; static constexpr Card y5 = {1, 0}; static constexpr Card g0 = {2, 5}; static constexpr Card g1 = {2, 4}; static constexpr Card g2 = {2, 3}; static constexpr Card g3 = {2, 2}; static constexpr Card g4 = {2, 1}; static constexpr Card g5 = {2, 0}; static constexpr Card b0 = {3, 5}; static constexpr Card b1 = {3, 4}; static constexpr Card b2 = {3, 3}; static constexpr Card b3 = {3, 2}; static constexpr Card b4 = {3, 1}; static constexpr Card b5 = {3, 0}; static constexpr Card p0 = {4, 5}; static constexpr Card p1 = {4, 4}; static constexpr Card p2 = {4, 3}; static constexpr Card p3 = {4, 2}; static constexpr Card p4 = {4, 1}; static constexpr Card p5 = {4, 0}; static constexpr Card t0 = {5, 5}; static constexpr Card t1 = {5, 4}; static constexpr Card t2 = {5, 3}; static constexpr Card t3 = {5, 2}; static constexpr Card t4 = {5, 1}; static constexpr Card t5 = {5, 0}; static constexpr Card unknown = {std::numeric_limits::max(), 0}; static constexpr Card trash = {std::numeric_limits::max(), 1}; } } namespace Hanabi { inline std::string to_string(const Hanabi::Card &card); inline std::ostream &operator<<(std::ostream &os, const Card &card); /** * To store: * - Draw pile size * - Distribution of cards * - Which cards exist? * - Number of clues */ template using Stacks = std::array; template std::ostream& operator<<(std::ostream &os, const Stacks &stacks); struct CardMultiplicity { Card card; unsigned multiplicity; bool operator==(const CardMultiplicity &) const = default; }; template struct InnerCardArray { template using array_t = std::array; }; template<> struct InnerCardArray { template using array_t = std::bitset; }; template struct CardArray { using value_type = T; CardArray() = default; explicit CardArray(value_type default_val); void fill(value_type val); const value_type &operator[](const Card &card) const; value_type &operator[](const Card &card); auto operator<=>(const CardArray &) const = default; private: using inner_array_t = typename InnerCardArray::template array_t; std::array _array {}; }; enum class ActionType : std::uint8_t { play = 0, discard = 1, clue = 2, color_clue = 2, rank_clue = 3, end_game = 4, vote_terminate_players = 5, vote_terminate = 10, }; struct Action { ActionType type {}; Card card {}; }; inline std::ostream& operator<<(std::ostream& os, const Action& action); /** Would like to have 2 versions: * All: * - support playing cards, querying basic information * - support going back, but with a different interface: efficient (needs arguments, does not store) or using a stack * */ class HanabiStateIF { public: virtual void give_clue() = 0; virtual void discard(hand_index_t index) = 0; virtual void play(hand_index_t index) = 0; virtual void rotate_next_draw(const Card& card) = 0; virtual ActionType last_action_type() const = 0; virtual void revert() = 0; virtual void modify_clues(clue_t change) = 0; virtual void set_clues(clue_t clues) = 0; [[nodiscard]] virtual player_t turn() const = 0; [[nodiscard]] virtual clue_t num_clues() const = 0; [[nodiscard]] virtual unsigned score() const = 0; [[nodiscard]] virtual std::vector> hands() const = 0; [[nodiscard]] virtual std::vector cur_hand() const = 0; [[nodiscard]] virtual size_t draw_pile_size() const = 0; [[nodiscard]] virtual bool is_trash(const Card& card) const = 0; [[nodiscard]] virtual bool is_playable(const Card& card) const = 0; [[nodiscard]] virtual bool is_relative_state_initialized() const = 0; [[nodiscard]] virtual hand_index_t find_card_in_hand(const Card& card) const = 0; [[nodiscard]] virtual std::uint64_t enumerated_states() const = 0; [[nodiscard]] virtual const std::unordered_map& position_tablebase() const = 0; virtual void init_backtracking_information() = 0; virtual probability_t evaluate_state() = 0; [[nodiscard]] virtual std::optional lookup() const = 0; [[nodiscard]] virtual std::uint64_t unique_id() const = 0; [[nodiscard]] virtual std::pair, std::vector> dump_unique_id_parts() const = 0; virtual std::vector>> get_reasonable_actions() = 0; virtual std::vector>> possible_next_states(hand_index_t index, bool play) = 0; virtual ~HanabiStateIF() = default; protected: virtual void print(std::ostream& os) const = 0; friend std::ostream& operator<<(std::ostream&, HanabiStateIF const&); }; // A game mimics a game state together with a list of actions and allows to traverse the game // history by making and reverting the stored actions. struct Game { Game(std::unique_ptr state, std::vector actions, std::vector deck); unsigned cur_turn() const; void make_turn(); void revert_turn(); bool goto_draw_pile_size(size_t draw_pile_break); bool goto_turn(size_t turn); bool holds_state(); std::unique_ptr state; std::vector actions; std::vector deck; unsigned next_action; }; inline std::ostream &operator<<(std::ostream &os, HanabiStateIF const &hanabi_state); template class HanabiState : public HanabiStateIF { public: HanabiState() = default; explicit HanabiState(const std::vector& deck, uint8_t score_goal = 5 * num_suits); void give_clue() final; void discard(hand_index_t index) final; void play(hand_index_t index) final; void rotate_next_draw(const Card& card) final; ActionType last_action_type() const final; void revert() final; void modify_clues(clue_t change) final; void set_clues(clue_t clues) final; [[nodiscard]] player_t turn() const final; [[nodiscard]] clue_t num_clues() const final; [[nodiscard]] unsigned score() const final; [[nodiscard]] std::vector> hands() const final; [[nodiscard]] std::vector cur_hand() const final; [[nodiscard]] size_t draw_pile_size() const final; [[nodiscard]] hand_index_t find_card_in_hand(const Card& card) const final; [[nodiscard]] bool is_trash(const Card& card) const final; [[nodiscard]] bool is_playable(const Card& card) const final; [[nodiscard]] bool is_relative_state_initialized() const final; [[nodiscard]] std::uint64_t enumerated_states() const final; [[nodiscard]] const std::unordered_map& position_tablebase() const final; void init_backtracking_information() final; probability_t evaluate_state() final; [[nodiscard]] std::optional lookup() const final; [[nodiscard]] std::uint64_t unique_id() const final; [[nodiscard]] std::pair, std::vector> dump_unique_id_parts() const final; std::vector>> get_reasonable_actions() final; std::vector>> possible_next_states(hand_index_t index, bool play) final; auto operator<=>(const HanabiState &) const = default; protected: void print(std::ostream& os) const final; private: struct BacktrackAction { explicit BacktrackAction( ActionType action_type, Card discarded_or_played = Cards::unknown, hand_index_t index = 0, bool was_on_8_clues = false, bool strike = false ); ActionType action_type{}; // The card that was discarded or played Card discarded{}; // Index of card in hand that was discarded or played hand_index_t index{}; // Indicates whether before the action was taken, we had 8 clues. // This is important so that we know if we go back to 7 or 8 clues when we revert playing a 5 bool was_on_8_clues {false}; // Indicates whether playing this card triggered a bomb. // This cannot be deduced just from the stacks since we cannot differentiate between a card // having been played correctly or the top card of the draw pile being bombed. bool strike {false}; }; // This keeps track of the representation of the gamestate relative to some starting state // and is used for id calculation struct RelativeRepresentationData { static constexpr player_t draw_pile = num_players; static constexpr player_t discard_pile = num_players + 1; static constexpr player_t play_stack = num_players + 2; enum CardPosition : uint8_t { hand = 0, played = 1, discarded = 2 }; // List of unique non-trash cards in draw pile boost::container::static_vector good_cards_draw; // Card positions of these cards. Indexes correspond to the cards stored in _good_cards_draw vector boost::container::static_vector, 30> card_positions_draw; // This will indicate whether cards that were in hands initially still are in hand // The first n bits are used and cards are assumed to have been marked with their indices in this bitset boost::container::static_vector card_positions_hands {}; // Note this is not the same as _good_cards_draw.size(), since this accounts for multiplicities std::uint8_t initial_draw_pile_size { 0 }; // Whether we initialized the values above and marked cards accordingly bool initialized { false }; }; unsigned long discard_and_potentially_update(hand_index_t index, bool cycle = false); unsigned long play_and_potentially_update(hand_index_t index, bool cycle = false); unsigned draw(hand_index_t index, bool cycle = false, bool played = true); void revert_draw(hand_index_t index, Card discarded_card, bool cycle = false, bool played = true); void revert_clue(); void revert_discard(bool cycle = false); void revert_play(bool cycle = false); void update_tablebase(unsigned long id, probability_t probability); template void do_for_each_potential_draw(hand_index_t index, bool play, Function f); void incr_turn(); void decr_turn(); void check_draw_pile_integrity() const; static constexpr uint8_t no_endgame = std::numeric_limits::max(); // Usual game state player_t _turn{}; clue_t _num_clues{}; std::uint8_t _weighted_draw_pile_size{}; Stacks _stacks{}; std::array, num_players> _hands{}; std::list _draw_pile{}; std::uint8_t _endgame_turns_left{}; // further values of game state that are technically determined, but we update them anyway int8_t _pace{}; uint8_t _score{}; uint8_t _score_goal{}; // For reverting the current game std::stack _actions_log; // For calculating ids of states during backtracking RelativeRepresentationData _relative_representation; // Lookup table for states. Uses the ids calculated using the relative representation std::unordered_map _position_tablebase; std::uint64_t _enumerated_states {}; }; template bool same_up_to_discard_permutation(HanabiState state1, HanabiState state2) { auto comp = [](CardMultiplicity &m1, CardMultiplicity &m2) -> bool { return m1.card.suit < m2.card.suit || (m1.card.suit == m2.card.suit and m1.card.rank < m2.card.rank) || (m1.card.suit == m2.card.suit and m1.card.rank == m2.card.rank and m1.multiplicity < m2.multiplicity); }; state1._draw_pile.sort(comp); state2._draw_pile.sort(comp); return state1 == state2; } } #include "game_state.hpp" #endif // DYNAMIC_PROGRAM_GAME_STATE_H