Store rationals without denominator

Instead of storing a rational for evey game state,
we just store how many of the factorial(draw pile size) many
game states can be won.
This allows us to save only one 64-bit integer per game state instead of
two and thus reduces memory consumption of the program significantly.
Also, this makes some computations a bit easier, since we do not have to
normalize when recursing - we can just add the numbe of winnable states
for each possible draw.

On the other hand, this means that upon lookup, we have to normalize the
stored values again to retrieve the probabilities.
In particular, one needs to know what the draw pile size of the game
state is in order to interpret the value of the state.
This commit is contained in:
Maximilian Keßler 2024-02-09 15:53:06 +01:00
parent afb6fee540
commit 5c4a2bb4f7
Signed by: max
GPG Key ID: BCC5A619923C0BA5
4 changed files with 89 additions and 33 deletions

42
include/factorial.h Normal file
View File

@ -0,0 +1,42 @@
//
// Created by maximilian on 2/9/24.
//
#ifndef ENDGAME_ANALYZER_FACTORIAL_H
#define ENDGAME_ANALYZER_FACTORIAL_H
#include <cstdint>
namespace Factorial {
inline std::uint64_t factorial(std::size_t n)
{
static const std::uint64_t table[] = {
1,
1 ,
2 ,
6 ,
24 ,
120 ,
720 ,
5040 ,
40320 ,
362880 ,
3628800 ,
39916800 ,
479001600 ,
6227020800 ,
87178291200 ,
1307674368000 ,
20922789888000 ,
355687428096000 ,
6402373705728000 ,
121645100408832000 ,
2432902008176640000 ,
};
assert(n <= 20);
return table[n];
}
}
#endif //ENDGAME_ANALYZER_FACTORIAL_H

View File

@ -85,7 +85,7 @@ namespace Hanabi
[[nodiscard]] virtual std::uint64_t enumerated_states() const = 0;
[[nodiscard]] virtual const map_type<unsigned long, probability_t> & position_tablebase() const = 0;
[[nodiscard]] virtual const map_type<unsigned long, std::uint64_t> & position_tablebase() const = 0;
virtual void init_backtracking_information() = 0;

View File

@ -115,7 +115,7 @@ namespace Hanabi
[[nodiscard]] std::uint64_t enumerated_states() const final;
[[nodiscard]] const map_type<unsigned long, probability_t> & position_tablebase() const final;
[[nodiscard]] const map_type<unsigned long, std::uint64_t> & position_tablebase() const final;
void init_backtracking_information() final;
@ -212,7 +212,7 @@ namespace Hanabi
void revert_play(bool cycle = false);
void update_tablebase(unsigned long id, probability_t probability);
void update_tablebase(unsigned long id, std::uint64_t probability);
template<class Function>
void do_for_each_potential_draw(hand_index_t index, bool play, Function f);
@ -223,7 +223,7 @@ namespace Hanabi
void check_draw_pile_integrity() const;
probability_t check_play_or_discard(hand_index_t index, bool play);
std::uint64_t check_play_or_discard(hand_index_t index, bool play);
// For the current state, returns whether we will save it in the lookup table.
// By default, this is just constant true, but we might want to trade memory for speed, i.e.
@ -231,6 +231,9 @@ namespace Hanabi
// when re-visiting the states.
bool save_state_to_map();
std::uint64_t internal_evaluate_state();
[[nodiscard]] std::optional<std::uint64_t> internal_lookup() const;
static constexpr uint8_t no_endgame = std::numeric_limits<uint8_t>::max();
// Usual game state
@ -263,7 +266,7 @@ namespace Hanabi
// Lookup table for states. Uses the ids calculated using the relative representation
bool const _save_memory;
map_type<unsigned long, probability_t> _position_tablebase;
map_type<unsigned long, std::uint64_t> _position_tablebase;
std::uint64_t _enumerated_states{};
};

View File

@ -14,6 +14,8 @@
#define CHECK_DRAW_PILE_INTEGRITY
#endif
#include "factorial.h"
namespace Hanabi
{
@ -942,10 +944,24 @@ namespace Hanabi
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::optional<probability_t> HanabiState<num_suits, num_players, hand_size>::lookup() const
{
std::optional<uint64_t> res = internal_lookup();
if (res.has_value())
{
return probability_t(res.value()) / Factorial::factorial(_weighted_draw_pile_size);
}
else
{
return std::nullopt;
}
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::optional<std::uint64_t> HanabiState<num_suits, num_players, hand_size>::internal_lookup() const
{
if (_score == 5 * num_suits)
{
return 1;
return Factorial::factorial(_weighted_draw_pile_size);
}
if (_pace < 0 or _endgame_turns_left == 0)
{
@ -997,20 +1013,21 @@ namespace Hanabi
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
probability_t HanabiState<num_suits, num_players, hand_size>::evaluate_state()
{
std::uint64_t num_wins = internal_evaluate_state();
return probability_t(num_wins)/ Factorial::factorial(_weighted_draw_pile_size);
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
std::uint64_t HanabiState<num_suits, num_players, hand_size>::internal_evaluate_state()
{
ASSERT(_relative_representation.initialized);
_enumerated_states++;
const unsigned long id_of_state = unique_id();
const unsigned id = 55032;
if (id_of_state == id)
{
std::cout << "Found state with id of " << id << "\n" << *this << std::endl;
}
if (_score == _score_goal)
{
return 1;
return Factorial::factorial(_weighted_draw_pile_size);
}
if (_pace < 0 || _endgame_turns_left == 0)
{
@ -1024,7 +1041,7 @@ namespace Hanabi
// TODO: Have some endgame analysis here?
probability_t best_probability = 0;
std::uint64_t best_probability = 0;
const std::array<Card, hand_size> & hand = _hands[_turn];
// First, check for playables
@ -1036,10 +1053,10 @@ namespace Hanabi
if (is_trash(hand[index])) {
played_trash = true;
}
probability_t const probability_play = check_play_or_discard(index, true);
std::uint64_t const probability_play = check_play_or_discard(index, true);
best_probability = std::max(best_probability, probability_play);
if (best_probability == 1)
if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
{
update_tablebase(id_of_state, best_probability);
return best_probability;
@ -1084,10 +1101,10 @@ namespace Hanabi
// Discard if we found trash now
if (discard_index != invalid_index) {
probability_t const probability_discard = check_play_or_discard(discard_index, play_card_instead_of_discarding);
std::uint64_t const probability_discard = check_play_or_discard(discard_index, play_card_instead_of_discarding);
best_probability = std::max(best_probability, probability_discard);
if (best_probability == 1)
if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
{
update_tablebase(id_of_state, best_probability);
return best_probability;
@ -1097,10 +1114,10 @@ namespace Hanabi
// sacrifice cards in hand
for(hand_index_t index = 0; index < hand_size; ++index) {
if(!is_critical(hand[index])) {
probability_t const probability_sacrifice = check_play_or_discard(index, play_card_instead_of_discarding);
std::uint64_t const probability_sacrifice = check_play_or_discard(index, play_card_instead_of_discarding);
best_probability = std::max(best_probability, probability_sacrifice);
if (best_probability == 1)
if (best_probability == Factorial::factorial(_weighted_draw_pile_size))
{
update_tablebase(id_of_state, best_probability);
return best_probability;
@ -1114,14 +1131,9 @@ namespace Hanabi
if (_num_clues >= clue_t(1))
{
give_clue();
const probability_t probability_stall = evaluate_state();
const std::uint64_t probability_stall = internal_evaluate_state();
revert_clue();
best_probability = std::max(best_probability, probability_stall);
if (best_probability == 1)
{
update_tablebase(id_of_state, best_probability);
return best_probability;
};
}
update_tablebase(id_of_state, best_probability);
@ -1129,15 +1141,14 @@ namespace Hanabi
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
probability_t HanabiState<num_suits, num_players, hand_size>::check_play_or_discard(hand_index_t index, bool play) {
probability_t sum_of_probabilities = 0;
std::uint64_t HanabiState<num_suits, num_players, hand_size>::check_play_or_discard(hand_index_t index, bool play) {
std::uint64_t sum_of_probabilities = 0;
do_for_each_potential_draw(index, play, [this, &sum_of_probabilities](const unsigned long multiplicity) {
sum_of_probabilities += evaluate_state() * multiplicity;
sum_of_probabilities += internal_evaluate_state() * multiplicity;
});
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
return sum_of_probabilities / total_weight;
return sum_of_probabilities;
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
@ -1349,7 +1360,7 @@ namespace Hanabi
}
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
const map_type<unsigned long, probability_t> &
const map_type<unsigned long, std::uint64_t> &
HanabiState<num_suits, num_players, hand_size>::position_tablebase() const
{
return _position_tablebase;
@ -1369,7 +1380,7 @@ namespace Hanabi
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
void HanabiState<num_suits, num_players, hand_size>::update_tablebase(
unsigned long id, Hanabi::probability_t probability
unsigned long id, std::uint64_t probability
)
{
// This macro can be activated if we want to dump details on all game states visited for analysis purposes.