Store rationals without denominator

Instead of storing a rational for evey game state, we just store how many of the factorial(draw pile size) many game states can be won. This allows us to save only one 64-bit integer per game state instead of two and thus reduces memory consumption of the program significantly. Also, this makes some computations a bit easier, since we do not have to normalize when recursing - we can just add the numbe of winnable states for each possible draw. On the other hand, this means that upon lookup, we have to normalize the stored values again to retrieve the probabilities. In particular, one needs to know what the draw pile size of the game state is in order to interpret the value of the state.
2024-02-09 15:53:06 +01:00 · 2024-02-09 15:53:06 +01:00 · 5c4a2bb4f7
commit 5c4a2bb4f7
parent afb6fee540
4 changed files with 89 additions and 33 deletions
--- a/include/factorial.h
+++ b/include/factorial.h
@ -0,0 +1,42 @@
+//
+// Created by maximilian on 2/9/24.
+//
+
+#ifndef ENDGAME_ANALYZER_FACTORIAL_H
+#define ENDGAME_ANALYZER_FACTORIAL_H
+
+#include <cstdint>
+
+namespace Factorial {
+  inline std::uint64_t factorial(std::size_t n)
+  {
+    static const std::uint64_t table[] = {
+      1,
+      1 ,
+      2 ,
+      6 ,
+      24 ,
+      120 ,
+      720 ,
+      5040 ,
+      40320 ,
+      362880 ,
+      3628800 ,
+      39916800 ,
+      479001600 ,
+      6227020800 ,
+      87178291200 ,
+      1307674368000 ,
+      20922789888000 ,
+      355687428096000 ,
+      6402373705728000 ,
+      121645100408832000 ,
+      2432902008176640000 ,
+    };
+    assert(n <= 20);
+    return table[n];
+  }
+}
+
+
+#endif //ENDGAME_ANALYZER_FACTORIAL_H
--- a/include/game_interface.h
+++ b/include/game_interface.h
@ -85,7 +85,7 @@ namespace Hanabi

    [[nodiscard]] virtual std::uint64_t enumerated_states() const = 0;

-    [[nodiscard]] virtual const map_type<unsigned long, probability_t> & position_tablebase() const = 0;
+    [[nodiscard]] virtual const map_type<unsigned long, std::uint64_t> & position_tablebase() const = 0;

    virtual void init_backtracking_information() = 0;

--- a/include/game_state.h
+++ b/include/game_state.h
@ -115,7 +115,7 @@ namespace Hanabi

    [[nodiscard]] std::uint64_t enumerated_states() const final;

-    [[nodiscard]] const map_type<unsigned long, probability_t> & position_tablebase() const final;
+    [[nodiscard]] const map_type<unsigned long, std::uint64_t> & position_tablebase() const final;

    void init_backtracking_information() final;

@ -212,7 +212,7 @@ namespace Hanabi
    void revert_play(bool cycle = false);


-    void update_tablebase(unsigned long id, probability_t probability);
+    void update_tablebase(unsigned long id, std::uint64_t probability);

    template<class Function>
    void do_for_each_potential_draw(hand_index_t index, bool play, Function f);
@ -223,7 +223,7 @@ namespace Hanabi

    void check_draw_pile_integrity() const;

-    probability_t check_play_or_discard(hand_index_t index, bool play);
+    std::uint64_t check_play_or_discard(hand_index_t index, bool play);

    // For the current state, returns whether we will save it in the lookup table.
    // By default, this is just constant true, but we might want to trade memory for speed, i.e.
@ -231,6 +231,9 @@ namespace Hanabi
    // when re-visiting the states.
    bool save_state_to_map();

+    std::uint64_t internal_evaluate_state();
+    [[nodiscard]] std::optional<std::uint64_t> internal_lookup() const;
+
    static constexpr uint8_t no_endgame = std::numeric_limits<uint8_t>::max();

    // Usual game state
@ -263,7 +266,7 @@ namespace Hanabi

    // Lookup table for states. Uses the ids calculated using the relative representation
    bool const _save_memory;
-    map_type<unsigned long, probability_t> _position_tablebase;
+    map_type<unsigned long, std::uint64_t> _position_tablebase;

    std::uint64_t _enumerated_states{};
  };
--- a/include/game_state.hpp
+++ b/include/game_state.hpp
@ -14,6 +14,8 @@
 #define CHECK_DRAW_PILE_INTEGRITY
 #endif

+#include "factorial.h"
+
 namespace Hanabi
 {

@ -942,10 +944,24 @@ namespace Hanabi

  template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
  std::optional<probability_t> HanabiState<num_suits, num_players, hand_size>::lookup() const
+  {
+    std::optional<uint64_t> res = internal_lookup();
+    if (res.has_value())
+    {
+      return probability_t(res.value()) / Factorial::factorial(_weighted_draw_pile_size);
+    }
+    else
+    {
+      return std::nullopt;
+    }
+  }
+
+  template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
+  std::optional<std::uint64_t> HanabiState<num_suits, num_players, hand_size>::internal_lookup() const
  {
    if (_score == 5 * num_suits)
    {
-      return 1;
+      return Factorial::factorial(_weighted_draw_pile_size);
    }
    if (_pace < 0 or _endgame_turns_left == 0)
    {
@ -997,20 +1013,21 @@ namespace Hanabi

  template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
  probability_t HanabiState<num_suits, num_players, hand_size>::evaluate_state()
+  {
+    std::uint64_t num_wins = internal_evaluate_state();
+    return probability_t(num_wins)/ Factorial::factorial(_weighted_draw_pile_size);
+  }
+
+  template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
+  std::uint64_t HanabiState<num_suits, num_players, hand_size>::internal_evaluate_state()
  {
    ASSERT(_relative_representation.initialized);
    _enumerated_states++;
    const unsigned long id_of_state = unique_id();

-    const unsigned id = 55032;
-    if (id_of_state == id)
-    {
-      std::cout << "Found state with id of " << id << "\n" << *this << std::endl;
-    }
-
    if (_score == _score_goal)
    {
-      return 1;
+      return Factorial::factorial(_weighted_draw_pile_size);
    }
    if (_pace < 0 || _endgame_turns_left == 0)
    {
@ -1024,7 +1041,7 @@ namespace Hanabi

    // TODO: Have some endgame analysis here?

-    probability_t best_probability = 0;
+    std::uint64_t best_probability = 0;
    const std::array<Card, hand_size> & hand = _hands[_turn];

    // First, check for playables
@ -1036,10 +1053,10 @@ namespace Hanabi
        if (is_trash(hand[index])) {
          played_trash = true;
        }
-        probability_t const probability_play = check_play_or_discard(index, true);
+        std::uint64_t const probability_play = check_play_or_discard(index, true);

        best_probability = std::max(best_probability, probability_play);
-        if (best_probability == 1)
+        if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
        {
          update_tablebase(id_of_state, best_probability);
          return best_probability;
@ -1084,10 +1101,10 @@ namespace Hanabi

      // Discard if we found trash now
      if (discard_index != invalid_index) {
-        probability_t const probability_discard = check_play_or_discard(discard_index, play_card_instead_of_discarding);
+        std::uint64_t const probability_discard = check_play_or_discard(discard_index, play_card_instead_of_discarding);

        best_probability = std::max(best_probability, probability_discard);
-        if (best_probability == 1)
+        if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
        {
          update_tablebase(id_of_state, best_probability);
          return best_probability;
@ -1097,10 +1114,10 @@ namespace Hanabi
        // sacrifice cards in hand
        for(hand_index_t index = 0; index < hand_size; ++index) {
          if(!is_critical(hand[index])) {
-            probability_t const probability_sacrifice = check_play_or_discard(index, play_card_instead_of_discarding);
+            std::uint64_t const probability_sacrifice = check_play_or_discard(index, play_card_instead_of_discarding);

            best_probability = std::max(best_probability, probability_sacrifice);
-            if (best_probability == 1)
+            if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
            {
              update_tablebase(id_of_state, best_probability);
              return best_probability;
@ -1114,14 +1131,9 @@ namespace Hanabi
    if (_num_clues >= clue_t(1))
    {
      give_clue();
-      const probability_t probability_stall = evaluate_state();
+      const std::uint64_t probability_stall = internal_evaluate_state();
      revert_clue();
      best_probability = std::max(best_probability, probability_stall);
-      if (best_probability == 1)
-      {
-        update_tablebase(id_of_state, best_probability);
-        return best_probability;
-      };
    }

    update_tablebase(id_of_state, best_probability);
@ -1129,15 +1141,14 @@ namespace Hanabi
  }

  template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
-  probability_t HanabiState<num_suits, num_players, hand_size>::check_play_or_discard(hand_index_t index, bool play) {
-    probability_t sum_of_probabilities = 0;
+  std::uint64_t HanabiState<num_suits, num_players, hand_size>::check_play_or_discard(hand_index_t index, bool play) {
+    std::uint64_t sum_of_probabilities = 0;

    do_for_each_potential_draw(index, play, [this, &sum_of_probabilities](const unsigned long multiplicity) {
-      sum_of_probabilities += evaluate_state() * multiplicity;
+      sum_of_probabilities += internal_evaluate_state() * multiplicity;
    });

-    const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
-    return sum_of_probabilities / total_weight;
+    return sum_of_probabilities;
  }

  template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
@ -1349,7 +1360,7 @@ namespace Hanabi
  }

  template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
-  const map_type<unsigned long, probability_t> &
+  const map_type<unsigned long, std::uint64_t> &
  HanabiState<num_suits, num_players, hand_size>::position_tablebase() const
  {
    return _position_tablebase;
@ -1369,7 +1380,7 @@ namespace Hanabi

  template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
  void HanabiState<num_suits, num_players, hand_size>::update_tablebase(
-        unsigned long id, Hanabi::probability_t probability
+        unsigned long id, std::uint64_t probability
  )
  {
    // This macro can be activated if we want to dump details on all game states visited for analysis purposes.