Endgame-Analyzer/include/game_state.h

406 lines
14 KiB
C
Raw Normal View History

2023-08-04 16:28:41 +02:00
#ifndef DYNAMIC_PROGRAM_GAME_STATE_H
#define DYNAMIC_PROGRAM_GAME_STATE_H
#include <array>
#include <bitset>
2023-08-12 19:43:22 +02:00
#include <cstdint>
2023-08-04 16:28:41 +02:00
#include <limits>
#include <list>
2023-08-12 19:43:22 +02:00
#include <optional>
2023-08-04 16:28:41 +02:00
#include <ostream>
2023-08-12 19:43:22 +02:00
#include <stack>
#include <unordered_map>
#include <vector>
#include <boost/container/static_vector.hpp>
2023-08-10 11:14:15 +02:00
#include <boost/rational.hpp>
2023-08-04 16:28:41 +02:00
namespace Hanabi {
using rank_t = std::uint8_t;
using suit_t = std::uint8_t;
using clue_t = std::uint8_t;
using player_t = std::uint8_t;
using hand_index_t = std::uint8_t;
using probability_base_type = unsigned long;
using rational_probability = boost::rational<probability_base_type>;
/**
* Define macro
* NUSE_RATIONAL_PROBABILITIES
* to use floating-point arithematic for the stored probabilities
* instead of rational representations
*/
#ifndef NUSE_RATIONAL_PROBABILITIES
using probability_t = boost::rational<probability_base_type>;
#else
using probability_t = double;
#endif
inline std::ostream& print_probability(std::ostream& os, double prob);
inline std::ostream& print_probability(std::ostream& os, const rational_probability& prob);
template<typename T>
std::ostream& print_probability(std::ostream& os, const std::optional<T>& prob);
2023-08-04 16:28:41 +02:00
2023-08-12 19:55:10 +02:00
/**
* We will generally assume that stacks are played from n to 0
* Playing a 0 will yield a clue
* Therefore, for the default hanabi, we will play 4,3,2,1,0 in that order
* on each stack. A stack with no cards played implicitly has value 5 on it
* This is just easier to implement, since then the remaining number of cards
* to be played is always the current number of the stack
*/
constexpr rank_t starting_card_rank = 5;
constexpr suit_t max_suit_index = 5;
constexpr size_t max_card_duplicity = 3;
constexpr clue_t max_num_clues = 8;
constexpr uint8_t not_in_starting_hand = std::numeric_limits<uint8_t>::max();
constexpr hand_index_t invalid_hand_idx = std::numeric_limits<hand_index_t>::max();
2023-08-04 16:28:41 +02:00
// We might want to change these at runtime to adapt to other variants.
// However, a global variable is used so that we can have an output operator for cards reading from here
// Note that this is therefore not static so that we have external linking
inline std::array<char, 6> suit_initials = {'r', 'y', 'g', 'b', 'p', 't'};
2023-08-04 16:28:41 +02:00
struct Card {
suit_t suit;
rank_t rank;
uint8_t local_index;
bool in_starting_hand;
bool initial_trash;
2023-08-04 16:28:41 +02:00
inline bool operator==(const Card &other) const;
};
namespace Cards {
static constexpr Card r0 = {0, 5};
static constexpr Card r1 = {0, 4};
static constexpr Card r2 = {0, 3};
static constexpr Card r3 = {0, 2};
static constexpr Card r4 = {0, 1};
static constexpr Card r5 = {0, 0};
static constexpr Card y0 = {1, 5};
static constexpr Card y1 = {1, 4};
static constexpr Card y2 = {1, 3};
static constexpr Card y3 = {1, 2};
static constexpr Card y4 = {1, 1};
static constexpr Card y5 = {1, 0};
static constexpr Card g0 = {2, 5};
static constexpr Card g1 = {2, 4};
static constexpr Card g2 = {2, 3};
static constexpr Card g3 = {2, 2};
static constexpr Card g4 = {2, 1};
static constexpr Card g5 = {2, 0};
static constexpr Card b0 = {3, 5};
static constexpr Card b1 = {3, 4};
static constexpr Card b2 = {3, 3};
static constexpr Card b3 = {3, 2};
static constexpr Card b4 = {3, 1};
static constexpr Card b5 = {3, 0};
static constexpr Card p0 = {4, 5};
static constexpr Card p1 = {4, 4};
static constexpr Card p2 = {4, 3};
static constexpr Card p3 = {4, 2};
static constexpr Card p4 = {4, 1};
static constexpr Card p5 = {4, 0};
static constexpr Card t0 = {5, 5};
static constexpr Card t1 = {5, 4};
static constexpr Card t2 = {5, 3};
static constexpr Card t3 = {5, 2};
static constexpr Card t4 = {5, 1};
static constexpr Card t5 = {5, 0};
static constexpr Card unknown = {std::numeric_limits<suit_t>::max(), 0};
static constexpr Card trash = {std::numeric_limits<suit_t>::max(), 1};
2023-10-13 00:18:08 +02:00
}
}
namespace std {
template<>
struct hash<Hanabi::Card> {
std::size_t operator()(Hanabi::Card const& card) const noexcept {
return card.suit * 6 + card.rank;
}
};
}
namespace Hanabi {
2023-08-04 16:28:41 +02:00
inline std::string to_string(const Hanabi::Card &card);
2023-08-12 00:04:02 +02:00
inline std::ostream &operator<<(std::ostream &os, const Card &card);
2023-08-04 16:28:41 +02:00
/**
* To store:
* - Draw pile size
* - Distribution of cards
* - Which cards exist?
* - Number of clues
*/
2023-08-07 10:45:11 +02:00
template <size_t num_suits>
using Stacks = std::array<rank_t, num_suits>;
2023-08-04 16:28:41 +02:00
2023-08-07 10:45:11 +02:00
template <size_t num_suits>
std::ostream& operator<<(std::ostream &os, const Stacks<num_suits> &stacks);
2023-08-04 16:28:41 +02:00
struct CardMultiplicity {
Card card;
2023-08-12 00:04:02 +02:00
unsigned multiplicity;
2023-08-04 16:28:41 +02:00
2023-08-12 19:55:10 +02:00
bool operator==(const CardMultiplicity &) const = default;
2023-08-04 16:28:41 +02:00
};
2023-08-07 01:51:24 +02:00
template<typename T>
struct InnerCardArray {
template<size_t N>
using array_t = std::array<T, N>;
};
template<>
struct InnerCardArray<bool> {
template<size_t N>
using array_t = std::bitset<N>;
};
template <suit_t num_suits, typename T> struct CardArray {
using value_type = T;
2023-08-04 16:28:41 +02:00
CardArray() = default;
explicit CardArray(value_type default_val);
2023-08-04 16:28:41 +02:00
2023-08-07 01:51:24 +02:00
void fill(value_type val);
const value_type &operator[](const Card &card) const;
2023-08-05 12:19:34 +02:00
value_type &operator[](const Card &card);
auto operator<=>(const CardArray &) const = default;
2023-08-04 16:28:41 +02:00
private:
2023-08-07 01:51:24 +02:00
using inner_array_t = typename InnerCardArray<T>::template array_t<starting_card_rank>;
std::array<inner_array_t , num_suits> _array {};
2023-08-04 16:28:41 +02:00
};
2023-08-12 19:55:10 +02:00
enum class ActionType : std::uint8_t {
2023-08-05 13:04:51 +02:00
play = 0,
discard = 1,
clue = 2,
color_clue = 2,
rank_clue = 3,
end_game = 4,
2023-09-14 17:06:06 +02:00
vote_terminate_players = 5,
2023-08-05 13:04:51 +02:00
vote_terminate = 10,
};
2023-08-04 16:28:41 +02:00
2023-08-11 16:54:11 +02:00
struct Action {
2023-08-11 15:41:03 +02:00
ActionType type {};
Card card {};
};
2023-08-04 16:28:41 +02:00
2023-08-12 00:04:02 +02:00
inline std::ostream& operator<<(std::ostream& os, const Action& action);
2023-08-11 16:54:11 +02:00
/** Would like to have 2 versions:
* All:
* - support playing cards, querying basic information
* - support going back, but with a different interface: efficient (needs arguments, does not store) or using a stack
*
*/
class HanabiStateIF {
public:
virtual void give_clue() = 0;
virtual void discard(hand_index_t index) = 0;
virtual void play(hand_index_t index) = 0;
virtual void rotate_next_draw(const Card& card) = 0;
virtual void revert() = 0;
[[nodiscard]] virtual player_t turn() const = 0;
[[nodiscard]] virtual clue_t num_clues() const = 0;
[[nodiscard]] virtual std::vector<std::vector<Card>> hands() const = 0;
[[nodiscard]] virtual std::vector<Card> cur_hand() const = 0;
[[nodiscard]] virtual size_t draw_pile_size() const = 0;
[[nodiscard]] virtual bool is_trash(const Card& card) const = 0;
[[nodiscard]] virtual bool is_playable(const Card& card) const = 0;
[[nodiscard]] virtual bool is_relative_state_initialized() const = 0;
[[nodiscard]] virtual hand_index_t find_card_in_hand(const Card& card) const = 0;
[[nodiscard]] virtual std::uint64_t enumerated_states() const = 0;
[[nodiscard]] virtual const std::unordered_map<unsigned long, probability_t>& position_tablebase() const = 0;
virtual void init_backtracking_information() = 0;
virtual probability_t evaluate_state() = 0;
[[nodiscard]] virtual std::optional<probability_t> lookup() const = 0;
[[nodiscard]] virtual std::uint64_t unique_id() const = 0;
2023-08-12 18:48:01 +02:00
[[nodiscard]] virtual std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const = 0;
2023-08-11 16:54:11 +02:00
virtual std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions() = 0;
2023-08-12 00:04:02 +02:00
virtual std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> possible_next_states(hand_index_t index, bool play) = 0;
2023-08-11 16:54:11 +02:00
virtual ~HanabiStateIF() = default;
protected:
virtual void print(std::ostream& os) const = 0;
friend std::ostream& operator<<(std::ostream&, HanabiStateIF const&);
};
2023-08-12 00:04:02 +02:00
inline std::ostream &operator<<(std::ostream &os, HanabiStateIF const &hanabi_state);
template <suit_t num_suits, player_t num_players, hand_index_t hand_size>
class HanabiState : public HanabiStateIF {
2023-08-04 16:28:41 +02:00
public:
2023-08-05 12:19:34 +02:00
HanabiState() = default;
explicit HanabiState(const std::vector<Card>& deck);
void give_clue() final;
void discard(hand_index_t index) final;
void play(hand_index_t index) final;
2023-08-05 12:19:34 +02:00
void rotate_next_draw(const Card& card) final;
void revert() final;
2023-08-05 12:19:34 +02:00
[[nodiscard]] player_t turn() const final;
[[nodiscard]] clue_t num_clues() const final;
[[nodiscard]] std::vector<std::vector<Card>> hands() const final;
[[nodiscard]] std::vector<Card> cur_hand() const final;
[[nodiscard]] size_t draw_pile_size() const final;
[[nodiscard]] hand_index_t find_card_in_hand(const Card& card) const final;
[[nodiscard]] bool is_trash(const Card& card) const final;
[[nodiscard]] bool is_playable(const Card& card) const final;
[[nodiscard]] bool is_relative_state_initialized() const final;
2023-08-05 12:19:34 +02:00
[[nodiscard]] std::uint64_t enumerated_states() const final;
[[nodiscard]] const std::unordered_map<unsigned long, probability_t>& position_tablebase() const final;
void init_backtracking_information() final;
2023-08-11 15:41:03 +02:00
probability_t evaluate_state() final;
2023-08-12 18:48:01 +02:00
[[nodiscard]] std::optional<probability_t> lookup() const final;
[[nodiscard]] std::uint64_t unique_id() const final;
[[nodiscard]] std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const final;
2023-08-11 15:41:03 +02:00
2023-08-11 16:54:11 +02:00
std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions() final;
2023-08-12 00:04:02 +02:00
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>> possible_next_states(hand_index_t index, bool play) final;
auto operator<=>(const HanabiState &) const = default;
2023-08-05 12:19:34 +02:00
protected:
void print(std::ostream& os) const final;
2023-08-04 16:28:41 +02:00
private:
struct BacktrackAction {
explicit BacktrackAction(
ActionType action_type,
Card discarded_or_played = Cards::unknown,
hand_index_t index = 0,
bool was_on_8_clues = false
);
ActionType action_type{};
// The card that was discarded or played
Card discarded{};
// Index of card in hand that was discarded or played
hand_index_t index{};
// Indicates whether before the action was taken, we had 8 clues.
// This is important so that we know if we go back to 7 or 8 clues when we revert playing a 5
bool was_on_8_clues {false};
};
2023-08-11 13:47:57 +02:00
// This keeps track of the representation of the gamestate relative to some starting state
// and is used for id calculation
struct RelativeRepresentationData {
// List of unique non-trash cards in draw pile
boost::container::static_vector<Card, 30> good_cards_draw;
// Card positions of these cards. Indexes correspond to the cards stored in _good_cards_draw vector
boost::container::static_vector<boost::container::static_vector<player_t, max_card_duplicity>, 30> card_positions_draw;
// This will indicate whether cards that were in hands initially still are in hand
// The first n bits are used and cards are assumed to have been marked with their indices in this bitset
std::bitset<num_players * hand_size> card_positions_hands {};
2023-08-12 19:55:10 +02:00
// Note this is not the same as _good_cards_draw.size(), since this accounts for multiplicities
std::uint8_t initial_draw_pile_size { 0 };
2023-08-11 13:47:57 +02:00
// Number of bits from above bitset that is meaningful
2023-08-12 19:55:10 +02:00
std::uint8_t num_useful_cards_in_starting_hands { 0 };
2023-08-11 13:47:57 +02:00
// Whether we initialized the values above and marked cards accordingly
bool initialized { false };
};
unsigned long discard_and_potentially_update(hand_index_t index);
unsigned long play_and_potentially_update(hand_index_t index);
2023-08-12 00:04:02 +02:00
unsigned draw(hand_index_t index);
2023-08-07 00:06:50 +02:00
void revert_draw(hand_index_t index, Card discarded_card);
void revert_clue();
void revert_discard();
void revert_play();
2023-08-05 12:19:34 +02:00
void update_tablebase(unsigned long id, probability_t probability);
template<class Function>
void do_for_each_potential_draw(hand_index_t index, bool play, Function f);
2023-08-05 12:19:34 +02:00
void incr_turn();
void decr_turn();
static constexpr uint8_t no_endgame = std::numeric_limits<uint8_t>::max();
static constexpr player_t draw_pile = num_players;
static constexpr player_t trash_or_play_stack = num_players + 1;
2023-08-11 13:47:57 +02:00
// Usual game state
2023-08-05 12:19:34 +02:00
player_t _turn{};
clue_t _num_clues{};
2023-08-06 11:54:57 +02:00
std::uint8_t _weighted_draw_pile_size{};
2023-08-05 12:19:34 +02:00
Stacks<num_suits> _stacks{};
std::array<std::array<Card, hand_size>, num_players> _hands{};
2023-08-05 12:19:34 +02:00
std::list<CardMultiplicity> _draw_pile{};
std::uint8_t _endgame_turns_left{};
2023-08-05 12:19:34 +02:00
2023-08-11 13:47:57 +02:00
// further values of game state that are technically determined, but we update them anyway
int8_t _pace{};
2023-08-06 11:54:57 +02:00
uint8_t _score{};
2023-08-05 12:19:34 +02:00
2023-08-11 13:47:57 +02:00
// For reverting the current game
std::stack<BacktrackAction> _actions_log;
2023-08-11 13:47:57 +02:00
// For calculating ids of states during backtracking
RelativeRepresentationData _relative_representation;
// Lookup table for states. Uses the ids calculated using the relative representation
2023-08-10 11:14:15 +02:00
std::unordered_map<unsigned long, probability_t> _position_tablebase;
2023-08-11 13:47:57 +02:00
std::uint64_t _enumerated_states {};
2023-08-05 12:19:34 +02:00
};
2023-08-04 16:28:41 +02:00
2023-08-06 14:06:41 +02:00
template <std::size_t num_suits, player_t num_players, std::size_t hand_size>
bool same_up_to_discard_permutation(HanabiState<num_suits, num_players, hand_size> state1, HanabiState<num_suits, num_players, hand_size> state2) {
auto comp = [](CardMultiplicity &m1, CardMultiplicity &m2) -> bool {
return m1.card.suit < m2.card.suit || (m1.card.suit == m2.card.suit and m1.card.rank < m2.card.rank) ||
(m1.card.suit == m2.card.suit and m1.card.rank == m2.card.rank and m1.multiplicity < m2.multiplicity);
};
state1._draw_pile.sort(comp);
state2._draw_pile.sort(comp);
return state1 == state2;
}
2023-08-04 16:28:41 +02:00
}
2023-08-05 11:55:46 +02:00
#include "game_state.hpp"
#endif // DYNAMIC_PROGRAM_GAME_STATE_H