From 84b95e7214f9436b973483462e214f32a0add983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Ke=C3=9Fler?= Date: Sat, 13 Jan 2024 00:48:57 +0100 Subject: [PATCH] expand getting actions: include non-reasonable actions on request --- include/game_interface.h | 2 +- include/game_state.h | 2 +- include/game_state.hpp | 54 ++++++++++++++++++++++++---------- include/hanabi_types.hpp | 2 +- src/command_line_interface.cpp | 2 +- src/hanabi_types.cpp | 4 +++ 6 files changed, 47 insertions(+), 19 deletions(-) diff --git a/include/game_interface.h b/include/game_interface.h index 1e511f1..ab831f5 100644 --- a/include/game_interface.h +++ b/include/game_interface.h @@ -80,7 +80,7 @@ namespace Hanabi [[nodiscard]] virtual std::pair, std::vector> dump_unique_id_parts() const = 0; - virtual std::vector>> get_reasonable_actions(bool evaluate_all = false) = 0; + virtual std::vector>> get_reasonable_actions(bool evaluate_all = false, bool reasonable = true) = 0; virtual std::vector>> possible_next_states(hand_index_t index, bool play) = 0; diff --git a/include/game_state.h b/include/game_state.h index ee88e7e..b3e11f7 100644 --- a/include/game_state.h +++ b/include/game_state.h @@ -127,7 +127,7 @@ namespace Hanabi [[nodiscard]] std::pair, std::vector> dump_unique_id_parts() const final; - std::vector>> get_reasonable_actions(bool evaluate_all = false) final; + std::vector>> get_reasonable_actions(bool evaluate_all = false, bool reasonable = true) final; std::vector>> possible_next_states(hand_index_t index, bool play) final; diff --git a/include/game_state.hpp b/include/game_state.hpp index e1016ce..7d3e9a0 100644 --- a/include/game_state.hpp +++ b/include/game_state.hpp @@ -765,20 +765,22 @@ namespace Hanabi template std::vector>> - HanabiState::get_reasonable_actions(bool evaluate_all) + HanabiState::get_reasonable_actions(bool evaluate_all, bool reasonable) { - std::vector>> reasonable_actions{}; + std::vector>> actions{}; if (_score == _score_goal or _pace < 0 or _endgame_turns_left == 0) { - return reasonable_actions; + return actions; } const std::array & hand = _hands[_turn]; // First, check for playable cards for (std::uint8_t index = 0; index < hand_size; index++) { - if (is_playable(hand[index])) + Card const & card = hand[index]; + bool const consider_playing = is_playable(hand[index]) or (not is_critical(card) and not reasonable); + if (consider_playing) { const Action action = {ActionType::play, hand[index]}; bool known = true; @@ -805,11 +807,11 @@ namespace Hanabi { const unsigned long total_weight = std::max(static_cast(_weighted_draw_pile_size), 1ul); const probability_t probability_play = sum_of_probabilities / total_weight; - reasonable_actions.emplace_back(action, probability_play); + actions.emplace_back(action, probability_play); } else { - reasonable_actions.emplace_back(action, std::nullopt); + actions.emplace_back(action, std::nullopt); } } } @@ -817,11 +819,36 @@ namespace Hanabi // Check for discards if (_pace > 0 and _num_clues < max_num_clues) { + auto trash_it = std::find_if(hand.begin(), hand.end(),[this](Card const & card){return is_trash(card);}); + bool const trash_in_hand = trash_it != hand.end(); + + bool discarded_trash = false; + std::vector discarded; for (std::uint8_t index = 0; index < hand_size; index++) { - if (is_trash(hand[index])) + Card card = hand[index]; + // We only consider discarding if + // - the card is trash, and we have not listed a trash discard yet + // - the card is not critical, and we have not listed the same card yet + bool const consider_discarding = (is_trash(card) and not discarded_trash) + or ((not trash_in_hand or not reasonable) and (not is_trash(card) and not is_critical(card) and std::find(discarded.begin(), discarded.end(), card) == discarded.end())); + + if (consider_discarding) { - const Action action = {ActionType::discard, hand[index]}; + if (is_trash(card)) + { + // This is useful for normalizing what we discard and therefore also for later printing routines. + // Also note that this card is automatically identified as trash by the is_trash() method, + // so properly handled by other parts of the program if input again. + card = Cards::trash; + discarded_trash = true; + } + else + { + discarded.push_back(card); + } + + const Action action = {ActionType::discard, card}; bool known = true; probability_t sum_of_probabilities = 0; @@ -846,15 +873,12 @@ namespace Hanabi { const unsigned long total_weight = std::max(static_cast(_weighted_draw_pile_size), 1ul); const probability_t probability_discard = sum_of_probabilities / total_weight; - reasonable_actions.emplace_back(action, probability_discard); + actions.emplace_back(action, probability_discard); } else { - reasonable_actions.emplace_back(action, std::nullopt); + actions.emplace_back(action, std::nullopt); } - - // All discards are equivalent, do not continue searching for different trash - break; } } } @@ -869,10 +893,10 @@ namespace Hanabi prob = lookup(); } const Action action = {ActionType::clue, Cards::unknown}; - reasonable_actions.emplace_back(action, prob); + actions.emplace_back(action, prob); revert_clue(); } - return reasonable_actions; + return actions; } template diff --git a/include/hanabi_types.hpp b/include/hanabi_types.hpp index 4eaadc1..f1d55e0 100644 --- a/include/hanabi_types.hpp +++ b/include/hanabi_types.hpp @@ -144,7 +144,7 @@ namespace Hanabi static constexpr Card t4 = {5, 1}; static constexpr Card t5 = {5, 0}; static constexpr Card unknown = {std::numeric_limits::max(), 0}; - static constexpr Card trash = {std::numeric_limits::max(), 1}; + static constexpr Card trash = {std::numeric_limits::max(), std::numeric_limits::max() }; } //// INLINE SECTION diff --git a/src/command_line_interface.cpp b/src/command_line_interface.cpp index c32ca1a..12d5958 100644 --- a/src/command_line_interface.cpp +++ b/src/command_line_interface.cpp @@ -131,7 +131,7 @@ namespace Hanabi bool reached = game.goto_turn(turn); game.state->evaluate_state(); ASSERT(reached); - for (auto const & [action, probability] : game.state->get_reasonable_actions(true)) { + for (auto const & [action, probability] : game.state->get_reasonable_actions(true, false)) { std::cout << "Turn " << turn << ", " << action << ": "; print_probability(std::cout, probability) << std::endl; } diff --git a/src/hanabi_types.cpp b/src/hanabi_types.cpp index 3a8aa4c..70bff7a 100644 --- a/src/hanabi_types.cpp +++ b/src/hanabi_types.cpp @@ -11,6 +11,10 @@ namespace Hanabi break; case ActionType::discard: os << "discard"; + if (action.card != Cards::trash) + { + os << " " + to_string(action.card); + } break; case ActionType::clue: os << "clue";