expand getting actions: include non-reasonable actions on request
This commit is contained in:
parent
35b78cb4db
commit
84b95e7214
6 changed files with 47 additions and 19 deletions
|
@ -80,7 +80,7 @@ namespace Hanabi
|
||||||
|
|
||||||
[[nodiscard]] virtual std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const = 0;
|
[[nodiscard]] virtual std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const = 0;
|
||||||
|
|
||||||
virtual std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions(bool evaluate_all = false) = 0;
|
virtual std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions(bool evaluate_all = false, bool reasonable = true) = 0;
|
||||||
|
|
||||||
virtual std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
|
virtual std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
|
||||||
possible_next_states(hand_index_t index, bool play) = 0;
|
possible_next_states(hand_index_t index, bool play) = 0;
|
||||||
|
|
|
@ -127,7 +127,7 @@ namespace Hanabi
|
||||||
|
|
||||||
[[nodiscard]] std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const final;
|
[[nodiscard]] std::pair<std::vector<std::uint64_t>, std::vector<Card>> dump_unique_id_parts() const final;
|
||||||
|
|
||||||
std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions(bool evaluate_all = false) final;
|
std::vector<std::pair<Action, std::optional<probability_t>>> get_reasonable_actions(bool evaluate_all = false, bool reasonable = true) final;
|
||||||
|
|
||||||
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
|
std::vector<std::pair<CardMultiplicity, std::optional<probability_t>>>
|
||||||
possible_next_states(hand_index_t index, bool play) final;
|
possible_next_states(hand_index_t index, bool play) final;
|
||||||
|
|
|
@ -765,20 +765,22 @@ namespace Hanabi
|
||||||
|
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
std::vector<std::pair<Action, std::optional<probability_t>>>
|
std::vector<std::pair<Action, std::optional<probability_t>>>
|
||||||
HanabiState<num_suits, num_players, hand_size>::get_reasonable_actions(bool evaluate_all)
|
HanabiState<num_suits, num_players, hand_size>::get_reasonable_actions(bool evaluate_all, bool reasonable)
|
||||||
{
|
{
|
||||||
std::vector<std::pair<Action, std::optional<probability_t>>> reasonable_actions{};
|
std::vector<std::pair<Action, std::optional<probability_t>>> actions{};
|
||||||
|
|
||||||
if (_score == _score_goal or _pace < 0 or _endgame_turns_left == 0)
|
if (_score == _score_goal or _pace < 0 or _endgame_turns_left == 0)
|
||||||
{
|
{
|
||||||
return reasonable_actions;
|
return actions;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::array<Card, hand_size> & hand = _hands[_turn];
|
const std::array<Card, hand_size> & hand = _hands[_turn];
|
||||||
// First, check for playable cards
|
// First, check for playable cards
|
||||||
for (std::uint8_t index = 0; index < hand_size; index++)
|
for (std::uint8_t index = 0; index < hand_size; index++)
|
||||||
{
|
{
|
||||||
if (is_playable(hand[index]))
|
Card const & card = hand[index];
|
||||||
|
bool const consider_playing = is_playable(hand[index]) or (not is_critical(card) and not reasonable);
|
||||||
|
if (consider_playing)
|
||||||
{
|
{
|
||||||
const Action action = {ActionType::play, hand[index]};
|
const Action action = {ActionType::play, hand[index]};
|
||||||
bool known = true;
|
bool known = true;
|
||||||
|
@ -805,11 +807,11 @@ namespace Hanabi
|
||||||
{
|
{
|
||||||
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
|
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
|
||||||
const probability_t probability_play = sum_of_probabilities / total_weight;
|
const probability_t probability_play = sum_of_probabilities / total_weight;
|
||||||
reasonable_actions.emplace_back(action, probability_play);
|
actions.emplace_back(action, probability_play);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
reasonable_actions.emplace_back(action, std::nullopt);
|
actions.emplace_back(action, std::nullopt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -817,11 +819,36 @@ namespace Hanabi
|
||||||
// Check for discards
|
// Check for discards
|
||||||
if (_pace > 0 and _num_clues < max_num_clues)
|
if (_pace > 0 and _num_clues < max_num_clues)
|
||||||
{
|
{
|
||||||
|
auto trash_it = std::find_if(hand.begin(), hand.end(),[this](Card const & card){return is_trash(card);});
|
||||||
|
bool const trash_in_hand = trash_it != hand.end();
|
||||||
|
|
||||||
|
bool discarded_trash = false;
|
||||||
|
std::vector<Card> discarded;
|
||||||
for (std::uint8_t index = 0; index < hand_size; index++)
|
for (std::uint8_t index = 0; index < hand_size; index++)
|
||||||
{
|
{
|
||||||
if (is_trash(hand[index]))
|
Card card = hand[index];
|
||||||
|
// We only consider discarding if
|
||||||
|
// - the card is trash, and we have not listed a trash discard yet
|
||||||
|
// - the card is not critical, and we have not listed the same card yet
|
||||||
|
bool const consider_discarding = (is_trash(card) and not discarded_trash)
|
||||||
|
or ((not trash_in_hand or not reasonable) and (not is_trash(card) and not is_critical(card) and std::find(discarded.begin(), discarded.end(), card) == discarded.end()));
|
||||||
|
|
||||||
|
if (consider_discarding)
|
||||||
{
|
{
|
||||||
const Action action = {ActionType::discard, hand[index]};
|
if (is_trash(card))
|
||||||
|
{
|
||||||
|
// This is useful for normalizing what we discard and therefore also for later printing routines.
|
||||||
|
// Also note that this card is automatically identified as trash by the is_trash() method,
|
||||||
|
// so properly handled by other parts of the program if input again.
|
||||||
|
card = Cards::trash;
|
||||||
|
discarded_trash = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
discarded.push_back(card);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Action action = {ActionType::discard, card};
|
||||||
bool known = true;
|
bool known = true;
|
||||||
probability_t sum_of_probabilities = 0;
|
probability_t sum_of_probabilities = 0;
|
||||||
|
|
||||||
|
@ -846,15 +873,12 @@ namespace Hanabi
|
||||||
{
|
{
|
||||||
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
|
const unsigned long total_weight = std::max(static_cast<unsigned long>(_weighted_draw_pile_size), 1ul);
|
||||||
const probability_t probability_discard = sum_of_probabilities / total_weight;
|
const probability_t probability_discard = sum_of_probabilities / total_weight;
|
||||||
reasonable_actions.emplace_back(action, probability_discard);
|
actions.emplace_back(action, probability_discard);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
reasonable_actions.emplace_back(action, std::nullopt);
|
actions.emplace_back(action, std::nullopt);
|
||||||
}
|
}
|
||||||
|
|
||||||
// All discards are equivalent, do not continue searching for different trash
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -869,10 +893,10 @@ namespace Hanabi
|
||||||
prob = lookup();
|
prob = lookup();
|
||||||
}
|
}
|
||||||
const Action action = {ActionType::clue, Cards::unknown};
|
const Action action = {ActionType::clue, Cards::unknown};
|
||||||
reasonable_actions.emplace_back(action, prob);
|
actions.emplace_back(action, prob);
|
||||||
revert_clue();
|
revert_clue();
|
||||||
}
|
}
|
||||||
return reasonable_actions;
|
return actions;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
template<suit_t num_suits, player_t num_players, hand_index_t hand_size>
|
||||||
|
|
|
@ -144,7 +144,7 @@ namespace Hanabi
|
||||||
static constexpr Card t4 = {5, 1};
|
static constexpr Card t4 = {5, 1};
|
||||||
static constexpr Card t5 = {5, 0};
|
static constexpr Card t5 = {5, 0};
|
||||||
static constexpr Card unknown = {std::numeric_limits<suit_t>::max(), 0};
|
static constexpr Card unknown = {std::numeric_limits<suit_t>::max(), 0};
|
||||||
static constexpr Card trash = {std::numeric_limits<suit_t>::max(), 1};
|
static constexpr Card trash = {std::numeric_limits<suit_t>::max(), std::numeric_limits<rank_t>::max() };
|
||||||
}
|
}
|
||||||
|
|
||||||
//// INLINE SECTION
|
//// INLINE SECTION
|
||||||
|
|
|
@ -131,7 +131,7 @@ namespace Hanabi
|
||||||
bool reached = game.goto_turn(turn);
|
bool reached = game.goto_turn(turn);
|
||||||
game.state->evaluate_state();
|
game.state->evaluate_state();
|
||||||
ASSERT(reached);
|
ASSERT(reached);
|
||||||
for (auto const & [action, probability] : game.state->get_reasonable_actions(true)) {
|
for (auto const & [action, probability] : game.state->get_reasonable_actions(true, false)) {
|
||||||
std::cout << "Turn " << turn << ", " << action << ": ";
|
std::cout << "Turn " << turn << ", " << action << ": ";
|
||||||
print_probability(std::cout, probability) << std::endl;
|
print_probability(std::cout, probability) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,10 @@ namespace Hanabi
|
||||||
break;
|
break;
|
||||||
case ActionType::discard:
|
case ActionType::discard:
|
||||||
os << "discard";
|
os << "discard";
|
||||||
|
if (action.card != Cards::trash)
|
||||||
|
{
|
||||||
|
os << " " + to_string(action.card);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case ActionType::clue:
|
case ActionType::clue:
|
||||||
os << "clue";
|
os << "clue";
|
||||||
|
|
Loading…
Reference in a new issue