2023-08-04 16:28:41 +02:00
|
|
|
#ifndef DYNAMIC_PROGRAM_GAME_STATE_H
|
|
|
|
#define DYNAMIC_PROGRAM_GAME_STATE_H
|
|
|
|
|
|
|
|
#include <array>
|
|
|
|
#include <bitset>
|
2023-08-12 19:43:22 +02:00
|
|
|
#include <cstdint>
|
2023-08-04 16:28:41 +02:00
|
|
|
#include <limits>
|
|
|
|
#include <list>
|
2023-08-12 19:43:22 +02:00
|
|
|
#include <optional>
|
2023-08-04 16:28:41 +02:00
|
|
|
#include <ostream>
|
2023-08-12 19:43:22 +02:00
|
|
|
#include <stack>
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <vector>
|
2023-11-10 20:35:05 +01:00
|
|
|
#include <memory>
|
2023-08-12 19:43:22 +02:00
|
|
|
|
|
|
|
#include <boost/container/static_vector.hpp>
|
2023-08-10 11:14:15 +02:00
|
|
|
#include <boost/rational.hpp>
|
2023-08-04 16:28:41 +02:00
|
|
|
|
2023-11-15 22:58:09 +01:00
|
|
|
#include "game_interface.h"
|
2023-08-04 16:28:41 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
namespace Hanabi
|
|
|
|
{
|
2023-08-05 00:34:31 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
template<size_t num_suits>
|
|
|
|
using Stacks = std::array<rank_t, num_suits>;
|
2023-08-04 16:28:41 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
template<size_t num_suits>
|
|
|
|
std::ostream & operator<<(std::ostream & os, const Stacks<num_suits> & stacks);
|
2023-08-04 16:28:41 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
template<typename T>
|
|
|
|
struct InnerCardArray
|
|
|
|
{
|
2023-08-07 01:51:24 +02:00
|
|
|
template<size_t N>
|
|
|
|
using array_t = std::array<T, N>;
|
2023-11-16 16:20:04 +01:00
|
|
|
};
|
2023-08-07 01:51:24 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
template<>
|
|
|
|
struct InnerCardArray<bool>
|
|
|
|
{
|
2023-08-07 01:51:24 +02:00
|
|
|
template<size_t N>
|
|
|
|
using array_t = std::bitset<N>;
|
2023-11-16 16:20:04 +01:00
|
|
|
};
|
2023-08-07 01:51:24 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
template<suit_t num_suits, typename T>
|
|
|
|
struct CardArray
|
|
|
|
{
|
2023-08-06 10:23:29 +02:00
|
|
|
using value_type = T;
|
2023-08-04 16:28:41 +02:00
|
|
|
|
2023-08-06 10:23:29 +02:00
|
|
|
CardArray() = default;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-06 10:23:29 +02:00
|
|
|
explicit CardArray(value_type default_val);
|
2023-08-04 16:28:41 +02:00
|
|
|
|
2023-08-07 01:51:24 +02:00
|
|
|
void fill(value_type val);
|
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
const value_type & operator[](const Card & card) const;
|
2023-08-05 12:19:34 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
value_type & operator[](const Card & card);
|
2023-08-06 10:23:29 +02:00
|
|
|
|
|
|
|
auto operator<=>(const CardArray &) const = default;
|
2023-08-04 16:28:41 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
private:
|
2023-08-07 01:51:24 +02:00
|
|
|
using inner_array_t = typename InnerCardArray<T>::template array_t<starting_card_rank>;
|
2023-11-16 16:20:04 +01:00
|
|
|
std::array<inner_array_t, num_suits> _array{};
|
|
|
|
};
|
2023-08-04 16:28:41 +02:00
|
|
|
|
2023-08-06 22:06:58 +02:00
|
|
|
|
2023-11-10 20:35:05 +01:00
|
|
|
// A game mimics a game state together with a list of actions and allows to traverse the game
|
|
|
|
// history by making and reverting the stored actions.
|
2023-11-16 16:20:04 +01:00
|
|
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
|
|
|
class HanabiState : public HanabiStateIF
|
|
|
|
{
|
|
|
|
public:
|
2023-08-05 12:19:34 +02:00
|
|
|
HanabiState() = default;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2024-01-09 00:36:32 +01:00
|
|
|
explicit HanabiState(const std::vector<Card> & deck, uint8_t score_goal = 5 * num_suits, clue_t num_clues_gained_on_discard_or_stack_finished = 1);
|
2023-08-05 12:19:34 +02:00
|
|
|
|
2023-08-12 08:50:28 +02:00
|
|
|
void give_clue() final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-10 12:06:13 +02:00
|
|
|
void discard(hand_index_t index) final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-11 14:24:17 +02:00
|
|
|
void play(hand_index_t index) final;
|
2023-08-05 12:19:34 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
void rotate_next_draw(const Card & card) final;
|
|
|
|
|
2023-11-11 11:44:06 +01:00
|
|
|
ActionType last_action_type() const final;
|
2023-08-11 18:28:12 +02:00
|
|
|
|
2023-08-11 14:24:17 +02:00
|
|
|
void revert() final;
|
2023-08-05 12:19:34 +02:00
|
|
|
|
2023-11-10 23:53:28 +01:00
|
|
|
void modify_clues(clue_t change) final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-11-11 01:16:25 +01:00
|
|
|
void set_clues(clue_t clues) final;
|
2023-11-10 23:53:28 +01:00
|
|
|
|
2023-08-12 08:50:28 +02:00
|
|
|
[[nodiscard]] player_t turn() const final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-12 08:50:28 +02:00
|
|
|
[[nodiscard]] clue_t num_clues() const final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-11-12 18:32:19 +01:00
|
|
|
[[nodiscard]] unsigned score() const final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-12 08:50:28 +02:00
|
|
|
[[nodiscard]] std::vector<std::vector<Card>> hands() const final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-12 08:50:28 +02:00
|
|
|
[[nodiscard]] std::vector<Card> cur_hand() const final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-12 08:50:28 +02:00
|
|
|
[[nodiscard]] size_t draw_pile_size() const final;
|
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
[[nodiscard]] hand_index_t find_card_in_hand(const Card & card) const final;
|
|
|
|
|
|
|
|
[[nodiscard]] bool is_trash(const Card & card) const final;
|
|
|
|
|
2024-01-12 14:27:16 +01:00
|
|
|
/** Returns whether the card is critical, assuming that it is non-trash */
|
|
|
|
[[nodiscard]] bool is_critical(const Card & card) const final;
|
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
[[nodiscard]] bool is_playable(const Card & card) const final;
|
|
|
|
|
2023-08-11 14:24:17 +02:00
|
|
|
[[nodiscard]] bool is_relative_state_initialized() const final;
|
2023-08-05 12:19:34 +02:00
|
|
|
|
2023-08-06 22:06:58 +02:00
|
|
|
[[nodiscard]] std::uint64_t enumerated_states() const final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
|
|
|
[[nodiscard]] const std::unordered_map<unsigned long, probability_t> & position_tablebase() const final;
|
2023-08-05 13:51:55 +02:00
|
|
|
|
2023-08-11 14:39:55 +02:00
|
|
|
void init_backtracking_information() final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-11 15:41:03 +02:00
|
|
|
probability_t evaluate_state() final;
|
|
|
|
|
2023-08-12 18:48:01 +02:00
|
|
|
[[nodiscard]] std::optional<probability_t> lookup() const final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-12 18:48:01 +02:00
|
|
|
[[nodiscard]] std::uint64_t unique_id() const final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-12 18:48:01 +02:00
|
|
|
[[nodiscard]] std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const final;
|
2023-08-11 15:41:03 +02:00
|
|
|
|
2023-08-11 16:54:11 +02:00
|
|
|
std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions() final;
|
2023-11-16 16:20:04 +01:00
|
|
|
|
|
|
|
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
|
|
|
|
possible_next_states(hand_index_t index, bool play) final;
|
2023-08-06 10:23:29 +02:00
|
|
|
|
2023-08-06 22:06:58 +02:00
|
|
|
auto operator<=>(const HanabiState &) const = default;
|
2023-08-05 12:19:34 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
protected:
|
|
|
|
void print(std::ostream & os) const final;
|
|
|
|
|
|
|
|
private:
|
|
|
|
struct BacktrackAction
|
|
|
|
{
|
|
|
|
explicit BacktrackAction(
|
|
|
|
ActionType action_type
|
|
|
|
, Card discarded_or_played = Cards::unknown
|
|
|
|
, hand_index_t index = 0
|
|
|
|
, bool was_on_8_clues = false
|
|
|
|
, bool strike = false
|
|
|
|
);
|
|
|
|
|
|
|
|
ActionType action_type{};
|
|
|
|
// The card that was discarded or played
|
|
|
|
Card discarded{};
|
|
|
|
// Index of card in hand that was discarded or played
|
|
|
|
hand_index_t index{};
|
|
|
|
|
|
|
|
// Indicates whether before the action was taken, we had 8 clues.
|
|
|
|
// This is important so that we know if we go back to 7 or 8 clues when we revert playing a 5
|
|
|
|
bool was_on_8_clues{false};
|
|
|
|
|
|
|
|
// Indicates whether playing this card triggered a bomb.
|
|
|
|
// This cannot be deduced just from the stacks since we cannot differentiate between a card
|
|
|
|
// having been played correctly or the top card of the draw pile being bombed.
|
|
|
|
bool strike{false};
|
2023-08-11 11:43:05 +02:00
|
|
|
};
|
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
// This keeps track of the representation of the gamestate relative to some starting state
|
|
|
|
// and is used for id calculation
|
|
|
|
struct RelativeRepresentationData
|
|
|
|
{
|
|
|
|
static constexpr player_t draw_pile = num_players;
|
|
|
|
static constexpr player_t discard_pile = num_players + 1;
|
|
|
|
static constexpr player_t play_stack = num_players + 2;
|
|
|
|
enum CardPosition : uint8_t
|
|
|
|
{
|
|
|
|
hand = 0, played = 1, discarded = 2
|
|
|
|
};
|
|
|
|
// List of unique non-trash cards in draw pile
|
|
|
|
boost::container::static_vector<Card, 30> good_cards_draw;
|
|
|
|
|
|
|
|
// Card positions of these cards. Indexes correspond to the cards stored in _good_cards_draw vector
|
|
|
|
boost::container::static_vector<boost::container::static_vector<player_t, max_card_duplicity>
|
|
|
|
, 30> card_positions_draw;
|
|
|
|
|
2024-01-12 23:07:36 +01:00
|
|
|
|
|
|
|
// List of all non-trash cards in hands of base state
|
|
|
|
boost::container::static_vector<Card, num_players * hand_size> good_cards_hands;
|
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
// This will indicate whether cards that were in hands initially still are in hand
|
2024-01-12 23:07:36 +01:00
|
|
|
// The first n entries are used and cards are assumed to have been marked with their indices in this vector
|
2023-11-16 16:20:04 +01:00
|
|
|
boost::container::static_vector<CardPosition, num_players * hand_size> card_positions_hands{};
|
|
|
|
|
|
|
|
// Note this is not the same as _good_cards_draw.size(), since this accounts for multiplicities
|
|
|
|
std::uint8_t initial_draw_pile_size{0};
|
|
|
|
|
|
|
|
// Whether we initialized the values above and marked cards accordingly
|
|
|
|
bool initialized{false};
|
|
|
|
};
|
2023-08-11 13:47:57 +02:00
|
|
|
|
2023-11-11 13:07:15 +01:00
|
|
|
unsigned long discard_and_potentially_update(hand_index_t index, bool cycle = false);
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-11-11 13:07:15 +01:00
|
|
|
unsigned long play_and_potentially_update(hand_index_t index, bool cycle = false);
|
2023-08-11 14:24:17 +02:00
|
|
|
|
2023-11-14 14:17:58 +01:00
|
|
|
unsigned draw(hand_index_t index, bool cycle = false, bool played = true);
|
2023-08-07 00:06:50 +02:00
|
|
|
|
2023-11-14 14:17:58 +01:00
|
|
|
void revert_draw(hand_index_t index, Card discarded_card, bool cycle = false, bool played = true);
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-11 14:24:17 +02:00
|
|
|
void revert_clue();
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-11-11 12:15:54 +01:00
|
|
|
void revert_discard(bool cycle = false);
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-11-11 12:15:54 +01:00
|
|
|
void revert_play(bool cycle = false);
|
2023-08-11 14:24:17 +02:00
|
|
|
|
2023-08-05 12:19:34 +02:00
|
|
|
|
2023-08-10 18:23:33 +02:00
|
|
|
void update_tablebase(unsigned long id, probability_t probability);
|
|
|
|
|
2023-08-11 18:28:12 +02:00
|
|
|
template<class Function>
|
|
|
|
void do_for_each_potential_draw(hand_index_t index, bool play, Function f);
|
2023-08-10 18:23:33 +02:00
|
|
|
|
2023-08-05 12:19:34 +02:00
|
|
|
void incr_turn();
|
2023-11-16 16:20:04 +01:00
|
|
|
|
2023-08-05 12:19:34 +02:00
|
|
|
void decr_turn();
|
|
|
|
|
2023-11-11 11:44:37 +01:00
|
|
|
void check_draw_pile_integrity() const;
|
|
|
|
|
2024-01-12 18:05:15 +01:00
|
|
|
probability_t check_play_or_discard(hand_index_t index, bool play);
|
|
|
|
|
2023-08-07 11:48:39 +02:00
|
|
|
static constexpr uint8_t no_endgame = std::numeric_limits<uint8_t>::max();
|
|
|
|
|
2023-08-11 13:47:57 +02:00
|
|
|
// Usual game state
|
2024-01-09 00:36:32 +01:00
|
|
|
clue_t const _clues_gained_on_discard_or_stack_finished { 1 };
|
|
|
|
uint8_t const _score_goal{};
|
|
|
|
|
2023-08-05 12:19:34 +02:00
|
|
|
player_t _turn{};
|
|
|
|
clue_t _num_clues{};
|
2023-08-06 11:54:57 +02:00
|
|
|
std::uint8_t _weighted_draw_pile_size{};
|
2023-08-05 12:19:34 +02:00
|
|
|
Stacks<num_suits> _stacks{};
|
2023-08-05 13:51:55 +02:00
|
|
|
std::array<std::array<Card, hand_size>, num_players> _hands{};
|
2023-08-05 12:19:34 +02:00
|
|
|
std::list<CardMultiplicity> _draw_pile{};
|
2023-08-06 22:06:58 +02:00
|
|
|
std::uint8_t _endgame_turns_left{};
|
2023-08-05 12:19:34 +02:00
|
|
|
|
2024-01-12 14:27:16 +01:00
|
|
|
// This will actually not always be updated exactly, but only for those cards that are not
|
|
|
|
// trash yet, since for trash, this is simply not interesting.
|
|
|
|
// Thus, we only need to update this on discards or misplays.
|
|
|
|
CardArray<num_suits, int8_t> _num_copies_left {0};
|
|
|
|
|
2023-08-11 13:47:57 +02:00
|
|
|
// further values of game state that are technically determined, but we update them anyway
|
2023-08-08 00:44:50 +02:00
|
|
|
int8_t _pace{};
|
2023-08-06 11:54:57 +02:00
|
|
|
uint8_t _score{};
|
2023-08-05 12:19:34 +02:00
|
|
|
|
2023-08-11 13:47:57 +02:00
|
|
|
// For reverting the current game
|
|
|
|
std::stack<BacktrackAction> _actions_log;
|
2023-08-07 12:48:25 +02:00
|
|
|
|
2023-08-11 13:47:57 +02:00
|
|
|
// For calculating ids of states during backtracking
|
|
|
|
RelativeRepresentationData _relative_representation;
|
|
|
|
|
|
|
|
// Lookup table for states. Uses the ids calculated using the relative representation
|
2023-08-10 11:14:15 +02:00
|
|
|
std::unordered_map<unsigned long, probability_t> _position_tablebase;
|
2023-08-10 12:06:13 +02:00
|
|
|
|
2023-11-16 16:20:04 +01:00
|
|
|
std::uint64_t _enumerated_states{};
|
|
|
|
};
|
|
|
|
|
|
|
|
template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
|
|
|
|
bool same_up_to_discard_permutation(
|
|
|
|
HanabiState<num_suits, num_players, hand_size> state1, HanabiState<num_suits
|
|
|
|
, num_players
|
|
|
|
, hand_size> state2
|
|
|
|
)
|
|
|
|
{
|
|
|
|
auto comp = [](CardMultiplicity & m1, CardMultiplicity & m2) -> bool {
|
|
|
|
return m1.card.suit < m2.card.suit || (m1.card.suit == m2.card.suit and m1.card.rank < m2.card.rank) ||
|
|
|
|
(m1.card.suit == m2.card.suit and m1.card.rank == m2.card.rank and m1.multiplicity < m2.multiplicity);
|
2023-08-06 14:06:41 +02:00
|
|
|
};
|
2023-11-16 16:20:04 +01:00
|
|
|
state1._draw_pile.sort(comp);
|
|
|
|
state2._draw_pile.sort(comp);
|
|
|
|
return state1 == state2;
|
|
|
|
}
|
2023-08-06 14:06:41 +02:00
|
|
|
|
2023-08-04 16:28:41 +02:00
|
|
|
|
2023-08-05 00:34:31 +02:00
|
|
|
}
|
|
|
|
|
2023-08-05 11:55:46 +02:00
|
|
|
#include "game_state.hpp"
|
|
|
|
|
2023-08-05 00:34:31 +02:00
|
|
|
#endif // DYNAMIC_PROGRAM_GAME_STATE_H
|