Endgame-Analyzer/include/game_state.h
Maximilian Keßler 5b0834bc22
bugfix: card cycling on reverting only used internally
This ensures that when doing a regular revert,
we revert to exactly the same state (i.e. also identical
ordering of draw pile by default)
and only rotate the draw pile in case we ensure internally
that we iterate over all possible draws so that the ordering
is restored in the end.
2023-11-11 12:15:54 +01:00

433 lines
15 KiB
C++

#ifndef DYNAMIC_PROGRAM_GAME_STATE_H
#define DYNAMIC_PROGRAM_GAME_STATE_H
#include <array>
#include <bitset>
#include <cstdint>
#include <limits>
#include <list>
#include <optional>
#include <ostream>
#include <stack>
#include <unordered_map>
#include <vector>
#include <memory>
#include <boost/container/static_vector.hpp>
#include <boost/rational.hpp>
namespace Hanabi {
using rank_t = std::uint8_t;
using suit_t = std::uint8_t;
using clue_t = std::int8_t;
using player_t = std::uint8_t;
using hand_index_t = std::uint8_t;
using probability_base_type = unsigned long;
using rational_probability = boost::rational<probability_base_type>;
/**
* Define macro
* NUSE_RATIONAL_PROBABILITIES
* to use floating-point arithematic for the stored probabilities
* instead of rational representations
*/
#ifndef NUSE_RATIONAL_PROBABILITIES
using probability_t = boost::rational<probability_base_type>;
#else
using probability_t = double;
#endif
inline std::ostream& print_probability(std::ostream& os, double prob);
inline std::ostream& print_probability(std::ostream& os, const rational_probability& prob);
template<typename T>
std::ostream& print_probability(std::ostream& os, const std::optional<T>& prob);
/**
* We will generally assume that stacks are played from n to 0
* Playing a 0 will yield a clue
* Therefore, for the default hanabi, we will play 4,3,2,1,0 in that order
* on each stack. A stack with no cards played implicitly has value 5 on it
* This is just easier to implement, since then the remaining number of cards
* to be played is always the current number of the stack
*/
constexpr rank_t starting_card_rank = 5;
constexpr suit_t max_suit_index = 5;
constexpr size_t max_card_duplicity = 3;
constexpr clue_t max_num_clues = 8;
constexpr uint8_t not_in_starting_hand = std::numeric_limits<uint8_t>::max();
constexpr hand_index_t invalid_hand_idx = std::numeric_limits<hand_index_t>::max();
// We might want to change these at runtime to adapt to other variants.
// However, a global variable is used so that we can have an output operator for cards reading from here
// Note that this is therefore not static so that we have external linking
inline std::array<char, 6> suit_initials = {'r', 'y', 'g', 'b', 'p', 't'};
struct Card {
suit_t suit;
rank_t rank;
uint8_t local_index;
bool in_starting_hand;
bool initial_trash;
inline bool operator==(const Card &other) const;
};
namespace Cards {
static constexpr Card r0 = {0, 5};
static constexpr Card r1 = {0, 4};
static constexpr Card r2 = {0, 3};
static constexpr Card r3 = {0, 2};
static constexpr Card r4 = {0, 1};
static constexpr Card r5 = {0, 0};
static constexpr Card y0 = {1, 5};
static constexpr Card y1 = {1, 4};
static constexpr Card y2 = {1, 3};
static constexpr Card y3 = {1, 2};
static constexpr Card y4 = {1, 1};
static constexpr Card y5 = {1, 0};
static constexpr Card g0 = {2, 5};
static constexpr Card g1 = {2, 4};
static constexpr Card g2 = {2, 3};
static constexpr Card g3 = {2, 2};
static constexpr Card g4 = {2, 1};
static constexpr Card g5 = {2, 0};
static constexpr Card b0 = {3, 5};
static constexpr Card b1 = {3, 4};
static constexpr Card b2 = {3, 3};
static constexpr Card b3 = {3, 2};
static constexpr Card b4 = {3, 1};
static constexpr Card b5 = {3, 0};
static constexpr Card p0 = {4, 5};
static constexpr Card p1 = {4, 4};
static constexpr Card p2 = {4, 3};
static constexpr Card p3 = {4, 2};
static constexpr Card p4 = {4, 1};
static constexpr Card p5 = {4, 0};
static constexpr Card t0 = {5, 5};
static constexpr Card t1 = {5, 4};
static constexpr Card t2 = {5, 3};
static constexpr Card t3 = {5, 2};
static constexpr Card t4 = {5, 1};
static constexpr Card t5 = {5, 0};
static constexpr Card unknown = {std::numeric_limits<suit_t>::max(), 0};
static constexpr Card trash = {std::numeric_limits<suit_t>::max(), 1};
}
}
namespace std {
template<>
struct hash<Hanabi::Card> {
std::size_t operator()(Hanabi::Card const& card) const noexcept {
return card.suit * 6 + card.rank;
}
};
}
namespace Hanabi {
inline std::string to_string(const Hanabi::Card &card);
inline std::ostream &operator<<(std::ostream &os, const Card &card);
/**
* To store:
* - Draw pile size
* - Distribution of cards
* - Which cards exist?
* - Number of clues
*/
template <size_t num_suits>
using Stacks = std::array<rank_t, num_suits>;
template <size_t num_suits>
std::ostream& operator<<(std::ostream &os, const Stacks<num_suits> &stacks);
struct CardMultiplicity {
Card card;
unsigned multiplicity;
bool operator==(const CardMultiplicity &) const = default;
};
template<typename T>
struct InnerCardArray {
template<size_t N>
using array_t = std::array<T, N>;
};
template<>
struct InnerCardArray<bool> {
template<size_t N>
using array_t = std::bitset<N>;
};
template <suit_t num_suits, typename T> struct CardArray {
using value_type = T;
CardArray() = default;
explicit CardArray(value_type default_val);
void fill(value_type val);
const value_type &operator[](const Card &card) const;
value_type &operator[](const Card &card);
auto operator<=>(const CardArray &) const = default;
private:
using inner_array_t = typename InnerCardArray<T>::template array_t<starting_card_rank>;
std::array<inner_array_t , num_suits> _array {};
};
enum class ActionType : std::uint8_t {
play = 0,
discard = 1,
clue = 2,
color_clue = 2,
rank_clue = 3,
end_game = 4,
vote_terminate_players = 5,
vote_terminate = 10,
};
struct Action {
ActionType type {};
Card card {};
};
inline std::ostream& operator<<(std::ostream& os, const Action& action);
/** Would like to have 2 versions:
* All:
* - support playing cards, querying basic information
* - support going back, but with a different interface: efficient (needs arguments, does not store) or using a stack
*
*/
class HanabiStateIF {
public:
virtual void give_clue() = 0;
virtual void discard(hand_index_t index) = 0;
virtual void play(hand_index_t index) = 0;
virtual void rotate_next_draw(const Card& card) = 0;
virtual ActionType last_action_type() const = 0;
virtual void revert() = 0;
virtual void modify_clues(clue_t change) = 0;
virtual void set_clues(clue_t clues) = 0;
[[nodiscard]] virtual player_t turn() const = 0;
[[nodiscard]] virtual clue_t num_clues() const = 0;
[[nodiscard]] virtual std::vector<std::vector<Card>> hands() const = 0;
[[nodiscard]] virtual std::vector<Card> cur_hand() const = 0;
[[nodiscard]] virtual size_t draw_pile_size() const = 0;
[[nodiscard]] virtual bool is_trash(const Card& card) const = 0;
[[nodiscard]] virtual bool is_playable(const Card& card) const = 0;
[[nodiscard]] virtual bool is_relative_state_initialized() const = 0;
[[nodiscard]] virtual hand_index_t find_card_in_hand(const Card& card) const = 0;
[[nodiscard]] virtual std::uint64_t enumerated_states() const = 0;
[[nodiscard]] virtual const std::unordered_map<unsigned long, probability_t>& position_tablebase() const = 0;
virtual void init_backtracking_information() = 0;
virtual probability_t evaluate_state() = 0;
[[nodiscard]] virtual std::optional<probability_t> lookup() const = 0;
[[nodiscard]] virtual std::uint64_t unique_id() const = 0;
[[nodiscard]] virtual std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const = 0;
virtual std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions() = 0;
virtual std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> possible_next_states(hand_index_t index, bool play) = 0;
virtual ~HanabiStateIF() = default;
protected:
virtual void print(std::ostream& os) const = 0;
friend std::ostream& operator<<(std::ostream&, HanabiStateIF const&);
};
// A game mimics a game state together with a list of actions and allows to traverse the game
// history by making and reverting the stored actions.
struct Game {
void make_turn();
void revert_turn();
void forward_until(size_t turn = 100, size_t draw_pile_break = 0);
void revert_until(size_t draw_pile_break);
std::unique_ptr<HanabiStateIF> state;
std::vector<Action> actions;
std::vector<Card> deck;
unsigned next_action;
};
inline std::ostream &operator<<(std::ostream &os, HanabiStateIF const &hanabi_state);
template <suit_t num_suits, player_t num_players, hand_index_t hand_size>
class HanabiState : public HanabiStateIF {
public:
HanabiState() = default;
explicit HanabiState(const std::vector<Card>& deck, uint8_t score_goal = 5 * num_suits);
void give_clue() final;
void discard(hand_index_t index) final;
void play(hand_index_t index) final;
void rotate_next_draw(const Card& card) final;
ActionType last_action_type() const final;
void revert() final;
void modify_clues(clue_t change) final;
void set_clues(clue_t clues) final;
[[nodiscard]] player_t turn() const final;
[[nodiscard]] clue_t num_clues() const final;
[[nodiscard]] std::vector<std::vector<Card>> hands() const final;
[[nodiscard]] std::vector<Card> cur_hand() const final;
[[nodiscard]] size_t draw_pile_size() const final;
[[nodiscard]] hand_index_t find_card_in_hand(const Card& card) const final;
[[nodiscard]] bool is_trash(const Card& card) const final;
[[nodiscard]] bool is_playable(const Card& card) const final;
[[nodiscard]] bool is_relative_state_initialized() const final;
[[nodiscard]] std::uint64_t enumerated_states() const final;
[[nodiscard]] const std::unordered_map<unsigned long, probability_t>& position_tablebase() const final;
void init_backtracking_information() final;
probability_t evaluate_state() final;
[[nodiscard]] std::optional<probability_t> lookup() const final;
[[nodiscard]] std::uint64_t unique_id() const final;
[[nodiscard]] std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const final;
std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions() final;
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> possible_next_states(hand_index_t index, bool play) final;
auto operator<=>(const HanabiState &) const = default;
protected:
void print(std::ostream& os) const final;
private:
struct BacktrackAction {
explicit BacktrackAction(
ActionType action_type,
Card discarded_or_played = Cards::unknown,
hand_index_t index = 0,
bool was_on_8_clues = false
);
ActionType action_type{};
// The card that was discarded or played
Card discarded{};
// Index of card in hand that was discarded or played
hand_index_t index{};
// Indicates whether before the action was taken, we had 8 clues.
// This is important so that we know if we go back to 7 or 8 clues when we revert playing a 5
bool was_on_8_clues {false};
};
// This keeps track of the representation of the gamestate relative to some starting state
// and is used for id calculation
struct RelativeRepresentationData {
// List of unique non-trash cards in draw pile
boost::container::static_vector<Card, 30> good_cards_draw;
// Card positions of these cards. Indexes correspond to the cards stored in _good_cards_draw vector
boost::container::static_vector<boost::container::static_vector<player_t, max_card_duplicity>, 30> card_positions_draw;
// This will indicate whether cards that were in hands initially still are in hand
// The first n bits are used and cards are assumed to have been marked with their indices in this bitset
std::bitset<num_players * hand_size> card_positions_hands {};
// Note this is not the same as _good_cards_draw.size(), since this accounts for multiplicities
std::uint8_t initial_draw_pile_size { 0 };
// Number of bits from above bitset that is meaningful
std::uint8_t num_useful_cards_in_starting_hands { 0 };
// Whether we initialized the values above and marked cards accordingly
bool initialized { false };
};
unsigned long discard_and_potentially_update(hand_index_t index);
unsigned long play_and_potentially_update(hand_index_t index);
unsigned draw(hand_index_t index);
void revert_draw(hand_index_t index, Card discarded_card, bool cycle = false);
void revert_clue();
void revert_discard(bool cycle = false);
void revert_play(bool cycle = false);
void update_tablebase(unsigned long id, probability_t probability);
template<class Function>
void do_for_each_potential_draw(hand_index_t index, bool play, Function f);
void incr_turn();
void decr_turn();
void check_draw_pile_integrity() const;
static constexpr uint8_t no_endgame = std::numeric_limits<uint8_t>::max();
static constexpr player_t draw_pile = num_players;
static constexpr player_t trash_or_play_stack = num_players + 1;
// Usual game state
player_t _turn{};
clue_t _num_clues{};
std::uint8_t _weighted_draw_pile_size{};
Stacks<num_suits> _stacks{};
std::array<std::array<Card, hand_size>, num_players> _hands{};
std::list<CardMultiplicity> _draw_pile{};
std::uint8_t _endgame_turns_left{};
// further values of game state that are technically determined, but we update them anyway
int8_t _pace{};
uint8_t _score{};
uint8_t _score_goal{};
// For reverting the current game
std::stack<BacktrackAction> _actions_log;
// For calculating ids of states during backtracking
RelativeRepresentationData _relative_representation;
// Lookup table for states. Uses the ids calculated using the relative representation
std::unordered_map<unsigned long, probability_t> _position_tablebase;
std::uint64_t _enumerated_states {};
};
template <std::size_t num_suits, player_t num_players, std::size_t hand_size>
bool same_up_to_discard_permutation(HanabiState<num_suits, num_players, hand_size> state1, HanabiState<num_suits, num_players, hand_size> state2) {
auto comp = [](CardMultiplicity &m1, CardMultiplicity &m2) -> bool {
return m1.card.suit < m2.card.suit || (m1.card.suit == m2.card.suit and m1.card.rank < m2.card.rank) ||
(m1.card.suit == m2.card.suit and m1.card.rank == m2.card.rank and m1.multiplicity < m2.multiplicity);
};
state1._draw_pile.sort(comp);
state2._draw_pile.sort(comp);
return state1 == state2;
}
}
#include "game_state.hpp"
#endif // DYNAMIC_PROGRAM_GAME_STATE_H