#ifndef DYNAMIC_PROGRAM_GAME_STATE_H #define DYNAMIC_PROGRAM_GAME_STATE_H #include #include #include #include #include #include #include #include #include #include #include #include #include "game_interface.h" namespace Hanabi { template using Stacks = std::array; template std::ostream & operator<<(std::ostream & os, const Stacks & stacks); template struct InnerCardArray { template using array_t = std::array; }; template<> struct InnerCardArray { template using array_t = std::bitset; }; template struct CardArray { using value_type = T; CardArray() = default; explicit CardArray(value_type default_val); void fill(value_type val); const value_type & operator[](const Card & card) const; value_type & operator[](const Card & card); //auto operator<=>(const CardArray &) const = default; private: using inner_array_t = typename InnerCardArray::template array_t; std::array _array{}; }; // A game mimics a game state together with a list of actions and allows to traverse the game // history by making and reverting the stored actions. template class HanabiState : public HanabiStateIF { public: HanabiState() = default; explicit HanabiState(const std::vector & deck, HanabiStateConfig config = HanabiStateConfig()); void give_clue() final; void discard(hand_index_t index) final; void play(hand_index_t index) final; void rotate_next_draw(const Card & card) final; ActionType last_action_type() const final; void revert() final; void modify_clues(clue_t change) final; void set_clues(clue_t clues) final; [[nodiscard]] player_t turn() const final; [[nodiscard]] clue_t num_clues() const final; [[nodiscard]] unsigned num_strikes() const final; [[nodiscard]] unsigned score() const final; [[nodiscard]] std::vector> hands() const final; [[nodiscard]] std::vector cur_hand() const final; [[nodiscard]] size_t draw_pile_size() const final; [[nodiscard]] hand_index_t find_card_in_hand(const Card & card) const final; [[nodiscard]] bool is_trash(const Card & card) const final; /** Returns whether the card is critical, assuming that it is non-trash */ [[nodiscard]] bool is_critical(const Card & card) const final; [[nodiscard]] bool is_playable(const Card & card) const final; [[nodiscard]] bool is_relative_state_initialized() const final; [[nodiscard]] std::uint64_t enumerated_states() const final; [[nodiscard]] const map_type & position_tablebase() const final; void init_backtracking_information() final; probability_t evaluate_state() final; [[nodiscard]] std::optional lookup() const final; [[nodiscard]] std::uint64_t unique_id() const final; [[nodiscard]] std::pair, std::vector> dump_unique_id_parts() const final; std::vector>> get_reasonable_actions(bool evaluate_all = false, bool reasonable = true) final; std::vector>> possible_next_states(hand_index_t index, bool play) final; //auto operator<=>(const HanabiState &) const = default; protected: void print(std::ostream & os) const final; private: struct BacktrackAction { explicit BacktrackAction( ActionType action_type , Card discarded_or_played = Cards::unknown , hand_index_t index = 0 , bool was_on_8_clues = false , bool strike = false ); ActionType action_type{}; // The card that was discarded or played Card discarded{}; // Index of card in hand that was discarded or played hand_index_t index{}; // Indicates whether before the action was taken, we had 8 clues. // This is important so that we know if we go back to 7 or 8 clues when we revert playing a 5 bool was_on_8_clues{false}; // Indicates whether playing this card triggered a bomb. // This cannot be deduced just from the stacks since we cannot differentiate between a card // having been played correctly or the top card of the draw pile being bombed. bool strike{false}; }; // This keeps track of the representation of the gamestate relative to some starting state // and is used for id calculation struct RelativeRepresentationData { static constexpr player_t draw_pile = num_players; static constexpr player_t discard_pile = num_players + 1; static constexpr player_t play_stack = num_players + 2; enum CardPosition : uint8_t { hand = 0, played = 1, discarded = 2 }; // List of unique non-trash cards in draw pile boost::container::static_vector good_cards_draw; // Card positions of these cards. Indexes correspond to the cards stored in _good_cards_draw vector boost::container::static_vector , 30> card_positions_draw; // List of all non-trash cards in hands of base state boost::container::static_vector good_cards_hands; // This will indicate whether cards that were in hands initially still are in hand // The first n entries are used and cards are assumed to have been marked with their indices in this vector boost::container::static_vector card_positions_hands{}; // Note this is not the same as _good_cards_draw.size(), since this accounts for multiplicities std::uint8_t initial_draw_pile_size{0}; // Whether we initialized the values above and marked cards accordingly bool initialized{false}; }; unsigned long discard_and_potentially_update(hand_index_t index, bool cycle = false); unsigned long play_and_potentially_update(hand_index_t index, bool cycle, bool allow_strikeout); unsigned draw(hand_index_t index, bool cycle = false, bool played = true); void revert_draw(hand_index_t index, Card discarded_card, bool cycle = false, bool played = true); void revert_clue(); void revert_discard(bool cycle = false); void revert_play(bool cycle = false); void update_tablebase(unsigned long id, std::uint64_t probability); template void do_for_each_potential_draw(hand_index_t index, bool play, Function f); void incr_turn(); void decr_turn(); void check_draw_pile_integrity() const; std::uint64_t check_play_or_discard(hand_index_t index, bool play); // For the current state, returns whether we will save it in the lookup table. // By default, this is just constant true, but we might want to trade memory for speed, i.e. // store less states, which will reduce memory consumption at the cost of re-computing some of the values // when re-visiting the states. bool save_state_to_map(); std::uint64_t internal_evaluate_state(); [[nodiscard]] std::optional internal_lookup() const; static constexpr uint8_t no_endgame = std::numeric_limits::max(); // Usual game state clue_t const _clues_gained_on_discard_or_stack_finished { 1 }; uint8_t const _score_goal{}; player_t _turn{}; clue_t _num_clues{}; unsigned _num_strikes{}; std::uint8_t _weighted_draw_pile_size{}; Stacks _stacks{}; std::array, num_players> _hands{}; std::list _draw_pile{}; std::uint8_t _endgame_turns_left{}; // This will actually not always be updated exactly, but only for those cards that are not // trash yet, since for trash, this is simply not interesting. // Thus, we only need to update this on discards or misplays. CardArray _num_copies_left {0}; // further values of game state that are technically determined, but we update them anyway int8_t _pace{}; uint8_t _score{}; // For reverting the current game std::stack _actions_log; // For calculating ids of states during backtracking RelativeRepresentationData _relative_representation; // Lookup table for states. Uses the ids calculated using the relative representation bool const _save_memory; map_type _position_tablebase; std::uint64_t _enumerated_states{}; }; template bool same_up_to_discard_permutation( HanabiState state1, HanabiState state2 ) { auto comp = [](CardMultiplicity & m1, CardMultiplicity & m2) -> bool { return m1.card.suit < m2.card.suit || (m1.card.suit == m2.card.suit and m1.card.rank < m2.card.rank) || (m1.card.suit == m2.card.suit and m1.card.rank == m2.card.rank and m1.multiplicity < m2.multiplicity); }; state1._draw_pile.sort(comp); state2._draw_pile.sort(comp); return state1 == state2; } } #include "game_state.hpp" #endif // DYNAMIC_PROGRAM_GAME_STATE_H