Improve ask_question strategy
I also replaced the callback architecture behind `PublicInformation.ask_questions()` with a simpler method `PublicInformation.ask_question()` that gets called repeatedly. To make all float-based sorts easier, I used the `float-ord` package. I also used it to clean up some of the sorting in `decide_wrapped()`.
This commit is contained in:
parent
051ac7a097
commit
9364a06862
6 changed files with 132 additions and 163 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -3,6 +3,11 @@ name = "crossbeam"
|
|||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "float-ord"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.6"
|
||||
|
@ -39,6 +44,7 @@ name = "rust_hanabi"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -47,6 +53,7 @@ dependencies = [
|
|||
|
||||
[metadata]
|
||||
"checksum crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "348228ce9f93d20ffc30c18e575f82fa41b9c8bf064806c65d41eba4771595a0"
|
||||
"checksum float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bad48618fdb549078c333a7a8528acb57af271d0433bdecd523eb620628364e"
|
||||
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
|
||||
"checksum getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9047cfbd08a437050b363d35ef160452c5fe8ea5187ae0a624708c91581d685"
|
||||
"checksum libc 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4870ef6725dde13394134e587e4ab4eca13cb92e916209a31c851b49131d3c75"
|
||||
|
|
|
@ -8,4 +8,5 @@ rand = "*"
|
|||
log = "*"
|
||||
getopts = "*"
|
||||
fnv = "*"
|
||||
float-ord = "*"
|
||||
crossbeam = "0.2.5"
|
||||
|
|
|
@ -73,5 +73,5 @@ On the first 20000 seeds, we have these average scores and win rates:
|
|||
|---------|---------|---------|---------|---------|
|
||||
| cheat | 24.8594 | 24.9785 | 24.9720 | 24.9557 |
|
||||
| | 90.59 % | 98.17 % | 97.76 % | 96.42 % |
|
||||
| info | 22.3736 | 24.7840 | 24.9261 | 24.9160 |
|
||||
| | 10.41 % | 84.14 % | 94.33 % | 93.49 % |
|
||||
| info | 22.5194 | 24.7942 | 24.9354 | 24.9220 |
|
||||
| | 12.58 % | 84.46 % | 95.03 % | 94.01 % |
|
||||
|
|
|
@ -4,6 +4,7 @@ extern crate log;
|
|||
extern crate rand;
|
||||
extern crate crossbeam;
|
||||
extern crate fnv;
|
||||
extern crate float_ord;
|
||||
|
||||
mod helpers;
|
||||
mod game;
|
||||
|
|
|
@ -117,18 +117,37 @@ pub trait PublicInformation: Clone {
|
|||
|
||||
fn agrees_with(&self, other: Self) -> bool;
|
||||
|
||||
/// By defining `ask_questions`, we decides which `Question`s a player learns the answers to.
|
||||
/// By defining `ask_question`, we decides which `Question`s a player learns the answers to.
|
||||
///
|
||||
/// A player "asks" a question by calling the callback. Questions can depend on the answers to
|
||||
/// earlier questions: We are given a `&mut HandInfo<CardPossibilityTable>` that we'll have to pass
|
||||
/// to that callback; there, it will be modified to reflect the answer to the question. Note that `self`
|
||||
/// is not modified and thus reflects the state before any player "asked" any question.
|
||||
/// Whenever we need to compute a "hat value", this method will be called repeatedly, either
|
||||
/// until the information runs out, or until it returns `None`. These questions can depend on
|
||||
/// the answers to earlier questions: We are given a `&HandInfo<CardPossibilityTable>` that
|
||||
/// reflect the answers of previous questions for the same "hat value computation".
|
||||
///
|
||||
/// The product of the `info_amount()`s of all questions we have may not exceed `total_info`.
|
||||
/// For convenience, we pass a `&mut u32` to the callback, and it will be updated to the
|
||||
/// "remaining" information amount.
|
||||
fn ask_questions<Callback>(&self, &Player, &mut HandInfo<CardPossibilityTable>, Callback, total_info: u32)
|
||||
where Callback: FnMut(&mut HandInfo<CardPossibilityTable>, &mut u32, Box<Question>);
|
||||
/// Note that `self` does not reflect the answers to previous questions; it reflects the state
|
||||
/// before the entire "hat value" calculation.
|
||||
fn ask_question(&self, &Player, &HandInfo<CardPossibilityTable>, total_info: u32) -> Option<Box<Question>>;
|
||||
|
||||
fn ask_question_wrapper(&self, player: &Player, hand_info: &HandInfo<CardPossibilityTable>, total_info: u32)
|
||||
-> Option<Box<Question>>
|
||||
{
|
||||
assert!(total_info > 0);
|
||||
if total_info == 1 {
|
||||
None
|
||||
} else {
|
||||
let result = self.ask_question(player, hand_info, total_info);
|
||||
if let Some(ref question) = result {
|
||||
if question.info_amount() > total_info {
|
||||
panic!("ask_question returned question with info_amount = {} > total_info = {}!",
|
||||
question.info_amount(), total_info);
|
||||
}
|
||||
if question.info_amount() == 1 {
|
||||
panic!("ask_question returned a trivial question!");
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
fn set_player_infos(&mut self, infos: Vec<(Player, HandInfo<CardPossibilityTable>)>) {
|
||||
for (player, new_hand_info) in infos {
|
||||
|
@ -142,14 +161,10 @@ pub trait PublicInformation: Clone {
|
|||
) -> ModulusInformation {
|
||||
assert!(player != &view.player);
|
||||
let mut answer_info = ModulusInformation::none();
|
||||
{
|
||||
let callback = |hand_info: &mut HandInfo<CardPossibilityTable>, info_remaining: &mut u32, question: Box<Question>| {
|
||||
let new_answer_info = question.answer_info(view.get_hand(player), view.get_board());
|
||||
question.acknowledge_answer_info(new_answer_info.clone(), hand_info, view.get_board());
|
||||
answer_info.combine(new_answer_info, total_info);
|
||||
*info_remaining = answer_info.info_remaining(total_info);
|
||||
};
|
||||
self.ask_questions(player, hand_info, callback, total_info);
|
||||
while let Some(question) = self.ask_question_wrapper(player, hand_info, answer_info.info_remaining(total_info)) {
|
||||
let new_answer_info = question.answer_info(view.get_hand(player), view.get_board());
|
||||
question.acknowledge_answer_info(new_answer_info.clone(), hand_info, view.get_board());
|
||||
answer_info.combine(new_answer_info, total_info);
|
||||
}
|
||||
answer_info.cast_up(total_info);
|
||||
answer_info
|
||||
|
@ -162,14 +177,9 @@ pub trait PublicInformation: Clone {
|
|||
board: &BoardState,
|
||||
mut info: ModulusInformation,
|
||||
) {
|
||||
let total_info = info.modulus;
|
||||
{
|
||||
let callback = |hand_info: &mut HandInfo<CardPossibilityTable>, info_remaining: &mut u32, question: Box<Question>| {
|
||||
let answer_info = info.split(question.info_amount());
|
||||
question.acknowledge_answer_info(answer_info, hand_info, board);
|
||||
*info_remaining = info.modulus;
|
||||
};
|
||||
self.ask_questions(player, hand_info, callback, total_info);
|
||||
while let Some(question) = self.ask_question_wrapper(player, hand_info, info.modulus) {
|
||||
let answer_info = info.split(question.info_amount());
|
||||
question.acknowledge_answer_info(answer_info, hand_info, board);
|
||||
}
|
||||
assert!(info.value == 0);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use fnv::{FnvHashMap, FnvHashSet};
|
||||
use std::cmp::Ordering;
|
||||
use float_ord::*;
|
||||
|
||||
use strategy::*;
|
||||
use game::*;
|
||||
|
@ -420,12 +421,6 @@ impl MyPublicInformation {
|
|||
})
|
||||
}
|
||||
|
||||
fn knows_dead_card(&self, player: &Player) -> bool {
|
||||
self.hand_info[player].iter().any(|table| {
|
||||
table.probability_is_dead(&self.board) == 1.0
|
||||
})
|
||||
}
|
||||
|
||||
fn someone_else_needs_hint(&self, view: &OwnedGameView) -> bool {
|
||||
// Does another player have a playable card, but doesn't know it?
|
||||
view.get_other_players().iter().any(|player| {
|
||||
|
@ -518,117 +513,86 @@ impl PublicInformation for MyPublicInformation {
|
|||
*self == other
|
||||
}
|
||||
|
||||
fn ask_questions<Callback>(
|
||||
fn ask_question(
|
||||
&self,
|
||||
me: &Player,
|
||||
hand_info: &mut HandInfo<CardPossibilityTable>,
|
||||
mut ask_question: Callback,
|
||||
mut info_remaining: u32,
|
||||
) where Callback: FnMut(&mut HandInfo<CardPossibilityTable>, &mut u32, Box<Question>) {
|
||||
_me: &Player,
|
||||
hand_info: &HandInfo<CardPossibilityTable>,
|
||||
total_info: u32,
|
||||
) -> Option<Box<Question>> {
|
||||
// Changing anything inside this function will not break the information transfer
|
||||
// mechanisms!
|
||||
|
||||
let compute_augmented_hand_info = |hand_info: &HandInfo<CardPossibilityTable>| {
|
||||
hand_info.iter().cloned().enumerate()
|
||||
.map(|(i, card_table)| {
|
||||
let p_play = card_table.probability_is_playable(&self.board);
|
||||
let p_dead = card_table.probability_is_dead(&self.board);
|
||||
let is_determined = card_table.is_determined();
|
||||
(card_table, i, p_play, p_dead, is_determined)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
let augmented_hand_info_raw = hand_info.iter().cloned().enumerate().filter_map(|(i, card_table)| {
|
||||
let p_play = card_table.probability_is_playable(&self.board);
|
||||
let p_dead = card_table.probability_is_dead(&self.board);
|
||||
Some((i, p_play, p_dead))
|
||||
}).collect::<Vec<_>>();
|
||||
let know_playable_card = augmented_hand_info_raw.iter().any(|&(_, p_play, _)| p_play == 1.0);
|
||||
let know_dead_card = augmented_hand_info_raw.iter().any(|&(_, _, p_dead)| p_dead == 1.0);
|
||||
|
||||
if !self.knows_playable_card(me) { // TODO: changing this to "if true {" slightly improves the three-player game and
|
||||
// very slightly worsens the other cases. There probably is some
|
||||
// other way to make this decision that's better in all cases.
|
||||
let augmented_hand_info = compute_augmented_hand_info(hand_info);
|
||||
let mut ask_play = augmented_hand_info.iter()
|
||||
.filter(|&&(_, _, p_play, p_dead, is_determined)| {
|
||||
if is_determined { return false; }
|
||||
if p_dead == 1.0 { return false; }
|
||||
if p_play == 1.0 || p_play < 0.2 { return false; }
|
||||
true
|
||||
}).collect::<Vec<_>>();
|
||||
// sort by probability of play, then by index
|
||||
ask_play.sort_by(|&&(_, i1, p1, _, _), &&(_, i2, p2, _, _)| {
|
||||
// It's better to include higher-probability-of-playability
|
||||
// cards into our combo question, since that maximizes our
|
||||
// chance of finding out about a playable card.
|
||||
let result = p2.partial_cmp(&p1);
|
||||
if result == None || result == Some(Ordering::Equal) {
|
||||
i1.cmp(&i2)
|
||||
} else {
|
||||
result.unwrap()
|
||||
}
|
||||
});
|
||||
// We don't need to find out anything about cards that are determined or dead.
|
||||
let augmented_hand_info = augmented_hand_info_raw.into_iter().filter(|&(i, _, p_dead)| {
|
||||
if p_dead == 1.0 { false }
|
||||
else if hand_info[i].is_determined() { false }
|
||||
else { true }
|
||||
}).collect::<Vec<_>>();
|
||||
|
||||
if self.board.num_players == 5 {
|
||||
for &(_, i, _, _, _) in ask_play {
|
||||
ask_question(hand_info, &mut info_remaining, Box::new(q_is_playable(i)));
|
||||
if info_remaining <= 1 { return; }
|
||||
}
|
||||
} else {
|
||||
let mut rest_combo = AdditiveComboQuestion {questions: Vec::new()};
|
||||
for &(_, i, _, _, _) in ask_play {
|
||||
if rest_combo.info_amount() < info_remaining {
|
||||
rest_combo.questions.push(Box::new(q_is_playable(i)));
|
||||
}
|
||||
}
|
||||
rest_combo.questions.reverse(); // It's better to put lower-probability-of-playability
|
||||
// cards first: The difference only matters if we
|
||||
// find a playable card, and conditional on that,
|
||||
// it's better to find out about as many non-playable
|
||||
// cards as possible.
|
||||
if rest_combo.info_amount() < info_remaining && !self.knows_dead_card(me) {
|
||||
let mut ask_dead = augmented_hand_info.iter()
|
||||
.filter(|&&(_, _, _, p_dead, _)| {
|
||||
p_dead > 0.0 && p_dead < 1.0
|
||||
}).collect::<Vec<_>>();
|
||||
// sort by probability of death, then by index
|
||||
ask_dead.sort_by(|&&(_, i1, _, d1, _), &&(_, i2, _, d2, _)| {
|
||||
let result = d2.partial_cmp(&d1);
|
||||
if result == None || result == Some(Ordering::Equal) {
|
||||
i1.cmp(&i2)
|
||||
} else {
|
||||
result.unwrap()
|
||||
}
|
||||
});
|
||||
for &(_, i, _, _, _) in ask_dead {
|
||||
if rest_combo.info_amount() < info_remaining {
|
||||
rest_combo.questions.push(Box::new(q_is_dead(i)));
|
||||
}
|
||||
}
|
||||
}
|
||||
ask_question(hand_info, &mut info_remaining, Box::new(rest_combo));
|
||||
if info_remaining <= 1 { return; }
|
||||
if !know_playable_card {
|
||||
// Vector of tuples (ask_dead, i, p_yes), where ask_dead=false means we'll
|
||||
// ask if the card at i is playable, and ask_dead=true means we ask if the card at i is
|
||||
// dead. p_yes is the probability the answer is nonzero.
|
||||
let mut to_ask: Vec<(bool, usize, f32)> = augmented_hand_info.iter().filter_map(|&(i, p_play, _)| {
|
||||
if p_play == 0.0 { None }
|
||||
else { Some((false, i, p_play)) }
|
||||
}).collect();
|
||||
if !know_dead_card {
|
||||
to_ask.extend(augmented_hand_info.iter().filter_map(|&(i, _, p_dead)| {
|
||||
if p_dead == 0.0 { None }
|
||||
else { Some((true, i, p_dead)) }
|
||||
}));
|
||||
}
|
||||
|
||||
let combo_question_capacity = (total_info - 1) as usize;
|
||||
if to_ask.len() > combo_question_capacity {
|
||||
// The questions don't fit into an AdditiveComboQuestion.
|
||||
// Sort by type (ask_dead=false first), then by p_yes (bigger first)
|
||||
to_ask.sort_by_key(|&(ask_dead, _, p_yes)| {(ask_dead, FloatOrd(-p_yes))});
|
||||
to_ask.truncate(combo_question_capacity);
|
||||
}
|
||||
|
||||
// Sort by type (ask_dead=false first), then by p_yes (smaller first), since it's
|
||||
// better to put lower-probability-of-playability/death cards first: The difference
|
||||
// only matters if we find a playable/dead card, and conditional on that, it's better
|
||||
// to find out about as many non-playable/non-dead cards as possible.
|
||||
to_ask.sort_by_key(|&(ask_dead, _, p_yes)| {(ask_dead, FloatOrd(p_yes))});
|
||||
let questions = to_ask.into_iter().map(|(ask_dead, i, _)| -> Box<Question> {
|
||||
if ask_dead { Box::new(q_is_dead(i)) }
|
||||
else { Box::new(q_is_playable(i)) }
|
||||
}).collect::<Vec<_>>();
|
||||
if questions.len() > 0 {
|
||||
return Some(Box::new(AdditiveComboQuestion { questions }))
|
||||
}
|
||||
}
|
||||
|
||||
// Recompute augmented_hand_info, incorporating the things we learned when asking questions
|
||||
let augmented_hand_info = compute_augmented_hand_info(hand_info);
|
||||
let mut ask_partition = augmented_hand_info.iter()
|
||||
.filter(|&&(_, _, _, p_dead, is_determined)| {
|
||||
if is_determined { return false }
|
||||
// TODO: possibly still valuable to ask?
|
||||
if p_dead == 1.0 { return false }
|
||||
true
|
||||
}).collect::<Vec<_>>();
|
||||
// sort by probability of play, then by index
|
||||
ask_partition.sort_by(|&&(_, i1, p1, _, _), &&(_, i2, p2, _, _)| {
|
||||
// *higher* probabilities are better
|
||||
let result = p2.partial_cmp(&p1);
|
||||
if result == None || result == Some(Ordering::Equal) {
|
||||
i1.cmp(&i2)
|
||||
} else {
|
||||
result.unwrap()
|
||||
}
|
||||
});
|
||||
let ask_play_score = |p_play: f32| FloatOrd((p_play-0.7).abs());
|
||||
let mut ask_play = augmented_hand_info.iter().filter(|&&(_, p_play, _)| {
|
||||
ask_play_score(p_play) < FloatOrd(0.2)
|
||||
}).cloned().collect::<Vec<_>>();
|
||||
ask_play.sort_by_key(|&(i, p_play, _)| (ask_play_score(p_play), i));
|
||||
if let Some(&(i, _, _)) = ask_play.get(0) {
|
||||
return Some(Box::new(q_is_playable(i)));
|
||||
}
|
||||
|
||||
for &(ref card_table, i, _, _, _) in ask_partition {
|
||||
let question = CardPossibilityPartition::new(i, info_remaining, &card_table, &self.board);
|
||||
ask_question(hand_info, &mut info_remaining, Box::new(question));
|
||||
if info_remaining <= 1 { return; }
|
||||
let mut ask_partition = augmented_hand_info;
|
||||
// sort by probability of death (lowest first), then by index
|
||||
ask_partition.sort_by_key(|&(i, _, p_death)| {
|
||||
(FloatOrd(p_death), i)
|
||||
});
|
||||
if let Some(&(i, _, _)) = ask_partition.get(0) {
|
||||
let question = CardPossibilityPartition::new(i, total_info, &hand_info[i], &self.board);
|
||||
Some(Box::new(question))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -814,24 +778,14 @@ impl InformationPlayerStrategy {
|
|||
// debug!("{}: {}", i, card_table);
|
||||
// }
|
||||
|
||||
let playable_cards = private_info.iter().enumerate().filter(|&(_, card_table)| {
|
||||
card_table.probability_is_playable(&view.board) == 1.0
|
||||
// If possible, play the best playable card
|
||||
// the higher the play_score, the better to play
|
||||
let mut playable_cards = private_info.iter().enumerate().filter_map(|(i, card_table)| {
|
||||
if card_table.probability_is_playable(&view.board) != 1.0 { return None; }
|
||||
Some((i, self.get_average_play_score(view, card_table)))
|
||||
}).collect::<Vec<_>>();
|
||||
|
||||
if playable_cards.len() > 0 {
|
||||
// play the best playable card
|
||||
// the higher the play_score, the better to play
|
||||
let mut play_score = -1.0;
|
||||
let mut play_index = 0;
|
||||
|
||||
for (index, card_table) in playable_cards {
|
||||
let score = self.get_average_play_score(view, card_table);
|
||||
if score > play_score {
|
||||
play_score = score;
|
||||
play_index = index;
|
||||
}
|
||||
}
|
||||
|
||||
playable_cards.sort_by_key(|&(i, play_score)| (FloatOrd(-play_score), i));
|
||||
if let Some(&(play_index, _)) = playable_cards.get(0) {
|
||||
return TurnChoice::Play(play_index)
|
||||
}
|
||||
|
||||
|
@ -901,23 +855,19 @@ impl InformationPlayerStrategy {
|
|||
return TurnChoice::Discard(useless_indices[0]);
|
||||
}
|
||||
|
||||
// Play the best discardable card
|
||||
let mut compval = 0.0;
|
||||
let mut index = 0;
|
||||
for (i, card_table) in private_info.iter().enumerate() {
|
||||
// Make the least risky discard.
|
||||
let mut cards_by_discard_value = private_info.iter().enumerate().map(|(i, card_table)| {
|
||||
let probability_is_seen = card_table.probability_of_predicate(&|card| {
|
||||
view.can_see(card)
|
||||
});
|
||||
let my_compval =
|
||||
let compval =
|
||||
20.0 * probability_is_seen
|
||||
+ 10.0 * card_table.probability_is_dispensable(&view.board)
|
||||
+ card_table.average_value();
|
||||
|
||||
if my_compval > compval {
|
||||
compval = my_compval;
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
(i, compval)
|
||||
}).collect::<Vec<_>>();
|
||||
cards_by_discard_value.sort_by_key(|&(i, compval)| (FloatOrd(-compval), i));
|
||||
let (index, _) = cards_by_discard_value[0];
|
||||
TurnChoice::Discard(index)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue