Endgame-Analyzer/include/game_state.h

#ifndef DYNAMIC_PROGRAM_GAME_STATE_H
#define DYNAMIC_PROGRAM_GAME_STATE_H

#include <array>
#include <bitset>
#include <cstdint>
#include <limits>
#include <list>
#include <optional>
#include <ostream>
#include <stack>
#include <unordered_map>
#include <vector>
#include <memory>

#include <boost/container/static_vector.hpp>
#include <boost/rational.hpp>

#include "game_interface.h"

namespace Hanabi
{

  template<size_t num_suits>
  using Stacks = std::array<rank_t, num_suits>;

  template<size_t num_suits>
  std::ostream & operator<<(std::ostream & os, const Stacks<num_suits> & stacks);

  template<typename T>
  struct InnerCardArray
  {
    template<size_t N>
    using array_t = std::array<T, N>;
  };

  template<>
  struct InnerCardArray<bool>
  {
    template<size_t N>
    using array_t = std::bitset<N>;
  };

  template<suit_t num_suits, typename T>
  struct CardArray
  {
    using value_type = T;

    CardArray() = default;

    explicit CardArray(value_type default_val);

    void fill(value_type val);

    const value_type & operator[](const Card & card) const;

    value_type & operator[](const Card & card);

    auto operator<=>(const CardArray &) const = default;

  private:
    using inner_array_t = typename InnerCardArray<T>::template array_t<starting_card_rank>;
    std::array<inner_array_t, num_suits> _array{};
  };


// A game mimics a game state together with a list of actions and allows to traverse the game
// history by making and reverting the stored actions.
  template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
  class HanabiState : public HanabiStateIF
  {
  public:
    HanabiState() = default;

    explicit HanabiState(const std::vector<Card> & deck, uint8_t score_goal = 5 * num_suits, clue_t num_clues_gained_on_discard_or_stack_finished = 1);

    void give_clue() final;

    void discard(hand_index_t index) final;

    void play(hand_index_t index) final;

    void rotate_next_draw(const Card & card) final;

    ActionType last_action_type() const final;

    void revert() final;

    void modify_clues(clue_t change) final;

    void set_clues(clue_t clues) final;

    [[nodiscard]] player_t turn() const final;

    [[nodiscard]] clue_t num_clues() const final;

    [[nodiscard]] unsigned score() const final;

    [[nodiscard]] std::vector<std::vector<Card>> hands() const final;

    [[nodiscard]] std::vector<Card> cur_hand() const final;

    [[nodiscard]] size_t draw_pile_size() const final;

    [[nodiscard]] hand_index_t find_card_in_hand(const Card & card) const final;

    [[nodiscard]] bool is_trash(const Card & card) const final;

    /** Returns whether the card is critical, assuming that it is non-trash */
    [[nodiscard]] bool is_critical(const Card & card) const final;

    [[nodiscard]] bool is_playable(const Card & card) const final;

    [[nodiscard]] bool is_relative_state_initialized() const final;

    [[nodiscard]] std::uint64_t enumerated_states() const final;

    [[nodiscard]] const std::unordered_map<unsigned long, probability_t> & position_tablebase() const final;

    void init_backtracking_information() final;

    probability_t evaluate_state() final;

    [[nodiscard]] std::optional<probability_t> lookup() const final;

    [[nodiscard]] std::uint64_t unique_id() const final;

    [[nodiscard]] std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const final;

    std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions() final;

    std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
    possible_next_states(hand_index_t index, bool play) final;

    auto operator<=>(const HanabiState &) const = default;

  protected:
    void print(std::ostream & os) const final;

  private:
    struct BacktrackAction
    {
      explicit BacktrackAction(
            ActionType action_type
            , Card discarded_or_played = Cards::unknown
            , hand_index_t index = 0
            , bool was_on_8_clues = false
            , bool strike = false
      );

      ActionType action_type{};
      // The card that was discarded or played
      Card discarded{};
      // Index of card in hand that was discarded or played
      hand_index_t index{};

      // Indicates whether before the action was taken, we had 8 clues.
      // This is important so that we know if we go back to 7 or 8 clues when we revert playing a 5
      bool was_on_8_clues{false};

      // Indicates whether playing this card triggered a bomb.
      // This cannot be deduced just from the stacks since we cannot differentiate between a card
      // having been played correctly or the top card of the draw pile being bombed.
      bool strike{false};
    };

    // This keeps track of the representation of the gamestate relative to some starting state
    // and is used for id calculation
    struct RelativeRepresentationData
    {
      static constexpr player_t draw_pile = num_players;
      static constexpr player_t discard_pile = num_players + 1;
      static constexpr player_t play_stack = num_players + 2;
      enum CardPosition : uint8_t
      {
        hand = 0, played = 1, discarded = 2
      };
      // List of unique non-trash cards in draw pile
      boost::container::static_vector<Card, 30> good_cards_draw;

      // Card positions of these cards. Indexes correspond to the cards stored in _good_cards_draw vector
      boost::container::static_vector<boost::container::static_vector<player_t, max_card_duplicity>
            , 30> card_positions_draw;

      // This will indicate whether cards that were in hands initially still are in hand
      // The first n bits are used and cards are assumed to have been marked with their indices in this bitset
      boost::container::static_vector<CardPosition, num_players * hand_size> card_positions_hands{};

      // Note this is not the same as _good_cards_draw.size(), since this accounts for multiplicities
      std::uint8_t initial_draw_pile_size{0};

      // Whether we initialized the values above and marked cards accordingly
      bool initialized{false};
    };

    unsigned long discard_and_potentially_update(hand_index_t index, bool cycle = false);

    unsigned long play_and_potentially_update(hand_index_t index, bool cycle = false);

    unsigned draw(hand_index_t index, bool cycle = false, bool played = true);

    void revert_draw(hand_index_t index, Card discarded_card, bool cycle = false, bool played = true);

    void revert_clue();

    void revert_discard(bool cycle = false);

    void revert_play(bool cycle = false);


    void update_tablebase(unsigned long id, probability_t probability);

    template<class Function>
    void do_for_each_potential_draw(hand_index_t index, bool play, Function f);

    void incr_turn();

    void decr_turn();

    void check_draw_pile_integrity() const;

    probability_t check_play_or_discard(hand_index_t index, bool play);

    static constexpr uint8_t no_endgame = std::numeric_limits<uint8_t>::max();

    // Usual game state
    clue_t const _clues_gained_on_discard_or_stack_finished { 1 };
    uint8_t const _score_goal{};

    player_t _turn{};
    clue_t _num_clues{};
    std::uint8_t _weighted_draw_pile_size{};
    Stacks<num_suits> _stacks{};
    std::array<std::array<Card, hand_size>, num_players> _hands{};
    std::list<CardMultiplicity> _draw_pile{};
    std::uint8_t _endgame_turns_left{};

    // This will actually not always be updated exactly, but only for those cards that are not
    // trash yet, since for trash, this is simply not interesting.
    // Thus, we only need to update this on discards or misplays.
    CardArray<num_suits, int8_t> _num_copies_left {0};

    // further values of game state that are technically determined, but we update them anyway
    int8_t _pace{};
    uint8_t _score{};

    // For reverting the current game
    std::stack<BacktrackAction> _actions_log;

    // For calculating ids of states during backtracking
    RelativeRepresentationData _relative_representation;

    // Lookup table for states. Uses the ids calculated using the relative representation
    std::unordered_map<unsigned long, probability_t> _position_tablebase;

    std::uint64_t _enumerated_states{};
  };

  template<std::size_t num_suits, player_t num_players, std::size_t hand_size>
  bool same_up_to_discard_permutation(
        HanabiState<num_suits, num_players, hand_size> state1, HanabiState<num_suits
        , num_players
        , hand_size> state2
  )
  {
    auto comp = [](CardMultiplicity & m1, CardMultiplicity & m2) -> bool {
      return m1.card.suit < m2.card.suit || (m1.card.suit == m2.card.suit and m1.card.rank < m2.card.rank) ||
             (m1.card.suit == m2.card.suit and m1.card.rank == m2.card.rank and m1.multiplicity < m2.multiplicity);
    };
    state1._draw_pile.sort(comp);
    state2._draw_pile.sort(comp);
    return state1 == state2;
  }


}

#include "game_state.hpp"

#endif // DYNAMIC_PROGRAM_GAME_STATE_H