Endgame-Analyzer/include/game_state.h
Maximilian Keßler 5c4a2bb4f7
Store rationals without denominator
Instead of storing a rational for evey game state,
we just store how many of the factorial(draw pile size) many
game states can be won.
This allows us to save only one 64-bit integer per game state instead of
two and thus reduces memory consumption of the program significantly.
Also, this makes some computations a bit easier, since we do not have to
normalize when recursing - we can just add the numbe of winnable states
for each possible draw.

On the other hand, this means that upon lookup, we have to normalize the
stored values again to retrieve the probabilities.
In particular, one needs to know what the draw pile size of the game
state is in order to interpret the value of the state.
2024-02-09 15:58:15 +01:00

295 lines
9.5 KiB
C++

#ifndef DYNAMIC_PROGRAM_GAME_STATE_H
#define DYNAMIC_PROGRAM_GAME_STATE_H
#include <array>
#include <bitset>
#include <cstdint>
#include <limits>
#include <list>
#include <optional>
#include <ostream>
#include <stack>
#include <vector>
#include <memory>
#include <boost/container/static_vector.hpp>
#include <boost/rational.hpp>
#include "game_interface.h"
namespace Hanabi
{
template<size_t num_suits>
using Stacks = std::array<rank_t, num_suits>;
template<size_t num_suits>
std::ostream & operator<<(std::ostream & os, const Stacks<num_suits> & stacks);
template<typename T>
struct InnerCardArray
{
template<size_t N>
using array_t = std::array<T, N>;
};
template<>
struct InnerCardArray<bool>
{
template<size_t N>
using array_t = std::bitset<N>;
};
template<suit_t num_suits, typename T>
struct CardArray
{
using value_type = T;
CardArray() = default;
explicit CardArray(value_type default_val);
void fill(value_type val);
const value_type & operator[](const Card & card) const;
value_type & operator[](const Card & card);
//auto operator<=>(const CardArray &) const = default;
private:
using inner_array_t = typename InnerCardArray<T>::template array_t<starting_card_rank>;
std::array<inner_array_t, num_suits> _array{};
};
// A game mimics a game state together with a list of actions and allows to traverse the game
// history by making and reverting the stored actions.
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
class HanabiState : public HanabiStateIF
{
public:
HanabiState() = default;
explicit HanabiState(const std::vector<Card> & deck, HanabiStateConfig config = HanabiStateConfig());
void give_clue() final;
void discard(hand_index_t index) final;
void play(hand_index_t index) final;
void rotate_next_draw(const Card & card) final;
ActionType last_action_type() const final;
void revert() final;
void modify_clues(clue_t change) final;
void set_clues(clue_t clues) final;
[[nodiscard]] player_t turn() const final;
[[nodiscard]] clue_t num_clues() const final;
[[nodiscard]] unsigned num_strikes() const final;
[[nodiscard]] unsigned score() const final;
[[nodiscard]] std::vector<std::vector<Card>> hands() const final;
[[nodiscard]] std::vector<Card> cur_hand() const final;
[[nodiscard]] size_t draw_pile_size() const final;
[[nodiscard]] hand_index_t find_card_in_hand(const Card & card) const final;
[[nodiscard]] bool is_trash(const Card & card) const final;
/** Returns whether the card is critical, assuming that it is non-trash */
[[nodiscard]] bool is_critical(const Card & card) const final;
[[nodiscard]] bool is_playable(const Card & card) const final;
[[nodiscard]] bool is_relative_state_initialized() const final;
[[nodiscard]] std::uint64_t enumerated_states() const final;
[[nodiscard]] const map_type<unsigned long, std::uint64_t> & position_tablebase() const final;
void init_backtracking_information() final;
probability_t evaluate_state() final;
[[nodiscard]] std::optional<probability_t> lookup() const final;
[[nodiscard]] std::uint64_t unique_id() const final;
[[nodiscard]] std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const final;
std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions(bool evaluate_all = false, bool reasonable = true) final;
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
possible_next_states(hand_index_t index, bool play) final;
//auto operator<=>(const HanabiState &) const = default;
protected:
void print(std::ostream & os) const final;
private:
struct BacktrackAction
{
explicit BacktrackAction(
ActionType action_type
, Card discarded_or_played = Cards::unknown
, hand_index_t index = 0
, bool was_on_8_clues = false
, bool strike = false
);
ActionType action_type{};
// The card that was discarded or played
Card discarded{};
// Index of card in hand that was discarded or played
hand_index_t index{};
// Indicates whether before the action was taken, we had 8 clues.
// This is important so that we know if we go back to 7 or 8 clues when we revert playing a 5
bool was_on_8_clues{false};
// Indicates whether playing this card triggered a bomb.
// This cannot be deduced just from the stacks since we cannot differentiate between a card
// having been played correctly or the top card of the draw pile being bombed.
bool strike{false};
};
// This keeps track of the representation of the gamestate relative to some starting state
// and is used for id calculation
struct RelativeRepresentationData
{
static constexpr player_t draw_pile = num_players;
static constexpr player_t discard_pile = num_players + 1;
static constexpr player_t play_stack = num_players + 2;
enum CardPosition : uint8_t
{
hand = 0, played = 1, discarded = 2
};
// List of unique non-trash cards in draw pile
boost::container::static_vector<Card, 30> good_cards_draw;
// Card positions of these cards. Indexes correspond to the cards stored in _good_cards_draw vector
boost::container::static_vector<boost::container::static_vector<player_t, max_card_duplicity>
, 30> card_positions_draw;
// List of all non-trash cards in hands of base state
boost::container::static_vector<Card, num_players * hand_size> good_cards_hands;
// This will indicate whether cards that were in hands initially still are in hand
// The first n entries are used and cards are assumed to have been marked with their indices in this vector
boost::container::static_vector<CardPosition, num_players * hand_size> card_positions_hands{};
// Note this is not the same as _good_cards_draw.size(), since this accounts for multiplicities
std::uint8_t initial_draw_pile_size{0};
// Whether we initialized the values above and marked cards accordingly
bool initialized{false};
};
unsigned long discard_and_potentially_update(hand_index_t index, bool cycle = false);
unsigned long play_and_potentially_update(hand_index_t index, bool cycle = false);
unsigned draw(hand_index_t index, bool cycle = false, bool played = true);
void revert_draw(hand_index_t index, Card discarded_card, bool cycle = false, bool played = true);
void revert_clue();
void revert_discard(bool cycle = false);
void revert_play(bool cycle = false);
void update_tablebase(unsigned long id, std::uint64_t probability);
template<class Function>
void do_for_each_potential_draw(hand_index_t index, bool play, Function f);
void incr_turn();
void decr_turn();
void check_draw_pile_integrity() const;
std::uint64_t check_play_or_discard(hand_index_t index, bool play);
// For the current state, returns whether we will save it in the lookup table.
// By default, this is just constant true, but we might want to trade memory for speed, i.e.
// store less states, which will reduce memory consumption at the cost of re-computing some of the values
// when re-visiting the states.
bool save_state_to_map();
std::uint64_t internal_evaluate_state();
[[nodiscard]] std::optional<std::uint64_t> internal_lookup() const;
static constexpr uint8_t no_endgame = std::numeric_limits<uint8_t>::max();
// Usual game state
clue_t const _clues_gained_on_discard_or_stack_finished { 1 };
uint8_t const _score_goal{};
player_t _turn{};
clue_t _num_clues{};
unsigned _num_strikes{};
std::uint8_t _weighted_draw_pile_size{};
Stacks<num_suits> _stacks{};
std::array<std::array<Card, hand_size>, num_players> _hands{};
std::list<CardMultiplicity> _draw_pile{};
std::uint8_t _endgame_turns_left{};
// This will actually not always be updated exactly, but only for those cards that are not
// trash yet, since for trash, this is simply not interesting.
// Thus, we only need to update this on discards or misplays.
CardArray<num_suits, int8_t> _num_copies_left {0};
// further values of game state that are technically determined, but we update them anyway
int8_t _pace{};
uint8_t _score{};
// For reverting the current game
std::stack<BacktrackAction> _actions_log;
// For calculating ids of states during backtracking
RelativeRepresentationData _relative_representation;
// Lookup table for states. Uses the ids calculated using the relative representation
bool const _save_memory;
map_type<unsigned long, std::uint64_t> _position_tablebase;
std::uint64_t _enumerated_states{};
};
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
bool same_up_to_discard_permutation(
HanabiState<num_suits, num_players, hand_size> state1, HanabiState<num_suits
, num_players
, hand_size> state2
)
{
auto comp = [](CardMultiplicity & m1, CardMultiplicity & m2) -> bool {
return m1.card.suit < m2.card.suit || (m1.card.suit == m2.card.suit and m1.card.rank < m2.card.rank) ||
(m1.card.suit == m2.card.suit and m1.card.rank == m2.card.rank and m1.multiplicity < m2.multiplicity);
};
state1._draw_pile.sort(comp);
state2._draw_pile.sort(comp);
return state1 == state2;
}
}
#include "game_state.hpp"
#endif // DYNAMIC_PROGRAM_GAME_STATE_H