From 84b95e7214f9436b973483462e214f32a0add983 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Ke=C3=9Fler?= <git@maximilian-kessler.de>
Date: Sat, 13 Jan 2024 00:48:57 +0100
Subject: [PATCH] expand getting actions: include non-reasonable actions on
 request

---
 include/game_interface.h       |  2 +-
 include/game_state.h           |  2 +-
 include/game_state.hpp         | 54 ++++++++++++++++++++++++----------
 include/hanabi_types.hpp       |  2 +-
 src/command_line_interface.cpp |  2 +-
 src/hanabi_types.cpp           |  4 +++
 6 files changed, 47 insertions(+), 19 deletions(-)
diff --git a/include/game_interface.h b/include/game_interface.h
index 1e511f1..ab831f5 100644
--- a/include/game_interface.h
+++ b/include/game_interface.h
@@ -80,7 +80,7 @@ namespace Hanabi
 
     [[nodiscard]] virtual std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const = 0;
 
-    virtual std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions(bool evaluate_all = false) = 0;
+    virtual std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions(bool evaluate_all = false, bool reasonable = true) = 0;
 
     virtual std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
     possible_next_states(hand_index_t index, bool play) = 0;
diff --git a/include/game_state.h b/include/game_state.h
index ee88e7e..b3e11f7 100644
--- a/include/game_state.h
+++ b/include/game_state.h
@@ -127,7 +127,7 @@ namespace Hanabi
 
     [[nodiscard]] std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const final;
 
-    std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions(bool evaluate_all = false) final;
+    std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions(bool evaluate_all = false, bool reasonable = true) final;
 
     std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
     possible_next_states(hand_index_t index, bool play) final;
diff --git a/include/game_state.hpp b/include/game_state.hpp
index e1016ce..7d3e9a0 100644
--- a/include/game_state.hpp
+++ b/include/game_state.hpp
@@ -765,20 +765,22 @@ namespace Hanabi
 
   template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
   std::vector<std::pair<Action, std::optional<probability_t>>>
-  HanabiState<num_suits, num_players, hand_size>::get_reasonable_actions(bool evaluate_all)
+  HanabiState<num_suits, num_players, hand_size>::get_reasonable_actions(bool evaluate_all, bool reasonable)
   {
-    std::vector<std::pair<Action, std::optional<probability_t>>> reasonable_actions{};
+    std::vector<std::pair<Action, std::optional<probability_t>>> actions{};
 
     if (_score == _score_goal or _pace < 0 or _endgame_turns_left == 0)
     {
-      return reasonable_actions;
+      return actions;
     }
 
     const std::array<Card, hand_size> & hand = _hands[_turn];
     // First, check for playable cards
     for (std::uint8_t index = 0; index < hand_size; index++)
     {
-      if (is_playable(hand[index]))
+      Card const & card = hand[index];
+      bool const consider_playing = is_playable(hand[index]) or (not is_critical(card) and not reasonable);
+      if (consider_playing)
       {
         const Action action = {ActionType::play, hand[index]};
         bool known = true;
@@ -805,11 +807,11 @@ namespace Hanabi
         {
           const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
           const probability_t probability_play = sum_of_probabilities / total_weight;
-          reasonable_actions.emplace_back(action, probability_play);
+          actions.emplace_back(action, probability_play);
         }
         else
         {
-          reasonable_actions.emplace_back(action, std::nullopt);
+          actions.emplace_back(action, std::nullopt);
         }
       }
     }
@@ -817,11 +819,36 @@ namespace Hanabi
     // Check for discards
     if (_pace > 0 and _num_clues < max_num_clues)
     {
+      auto trash_it = std::find_if(hand.begin(), hand.end(),[this](Card const & card){return is_trash(card);});
+      bool const trash_in_hand = trash_it != hand.end();
+
+      bool discarded_trash = false;
+      std::vector<Card> discarded;
       for (std::uint8_t index = 0; index < hand_size; index++)
       {
-        if (is_trash(hand[index]))
+        Card card = hand[index];
+        // We only consider discarding if
+        // - the card is trash, and we have not listed a trash discard yet
+        // - the card is not critical, and we have not listed the same card yet
+        bool const consider_discarding = (is_trash(card) and not discarded_trash)
+              or ((not trash_in_hand or not reasonable) and (not is_trash(card) and not is_critical(card) and std::find(discarded.begin(), discarded.end(), card) == discarded.end()));
+
+        if (consider_discarding)
         {
-          const Action action = {ActionType::discard, hand[index]};
+          if (is_trash(card))
+          {
+            // This is useful for normalizing what we discard and therefore also for later printing routines.
+            // Also note that this card is automatically identified as trash by the is_trash() method,
+            // so properly handled by other parts of the program if input again.
+            card = Cards::trash;
+            discarded_trash = true;
+          }
+          else
+          {
+            discarded.push_back(card);
+          }
+
+          const Action action = {ActionType::discard, card};
           bool known = true;
           probability_t sum_of_probabilities = 0;
 
@@ -846,15 +873,12 @@ namespace Hanabi
           {
             const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
             const probability_t probability_discard = sum_of_probabilities / total_weight;
-            reasonable_actions.emplace_back(action, probability_discard);
+            actions.emplace_back(action, probability_discard);
           }
           else
           {
-            reasonable_actions.emplace_back(action, std::nullopt);
+            actions.emplace_back(action, std::nullopt);
           }
-
-          // All discards are equivalent, do not continue searching for different trash
-          break;
         }
       }
     }
@@ -869,10 +893,10 @@ namespace Hanabi
         prob = lookup();
       }
       const Action action = {ActionType::clue, Cards::unknown};
-      reasonable_actions.emplace_back(action, prob);
+      actions.emplace_back(action, prob);
       revert_clue();
     }
-    return reasonable_actions;
+    return actions;
   }
 
   template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
diff --git a/include/hanabi_types.hpp b/include/hanabi_types.hpp
index 4eaadc1..f1d55e0 100644
--- a/include/hanabi_types.hpp
+++ b/include/hanabi_types.hpp
@@ -144,7 +144,7 @@ namespace Hanabi
     static constexpr Card t4 = {5, 1};
     static constexpr Card t5 = {5, 0};
     static constexpr Card unknown = {std::numeric_limits<suit_t>::max(), 0};
-    static constexpr Card trash = {std::numeric_limits<suit_t>::max(), 1};
+    static constexpr Card trash = {std::numeric_limits<suit_t>::max(), std::numeric_limits<rank_t>::max() };
   }
 
   //// INLINE SECTION
diff --git a/src/command_line_interface.cpp b/src/command_line_interface.cpp
index c32ca1a..12d5958 100644
--- a/src/command_line_interface.cpp
+++ b/src/command_line_interface.cpp
@@ -131,7 +131,7 @@ namespace Hanabi
         bool reached = game.goto_turn(turn);
         game.state->evaluate_state();
         ASSERT(reached);
-        for (auto const & [action, probability] : game.state->get_reasonable_actions(true)) {
+        for (auto const & [action, probability] : game.state->get_reasonable_actions(true, false)) {
           std::cout << "Turn " << turn << ", " << action << ": ";
           print_probability(std::cout, probability) << std::endl;
         }
diff --git a/src/hanabi_types.cpp b/src/hanabi_types.cpp
index 3a8aa4c..70bff7a 100644
--- a/src/hanabi_types.cpp
+++ b/src/hanabi_types.cpp
@@ -11,6 +11,10 @@ namespace Hanabi
         break;
       case ActionType::discard:
         os << "discard";
+        if (action.card != Cards::trash)
+        {
+          os << " " + to_string(action.card);
+        }
         break;
       case ActionType::clue:
         os << "clue";