From 9364a0686242cf1544ee2ad211da31c3d7cde554 Mon Sep 17 00:00:00 2001 From: Felix Bauckholt Date: Fri, 8 Mar 2019 20:59:14 +0100 Subject: [PATCH] Improve ask_question strategy I also replaced the callback architecture behind `PublicInformation.ask_questions()` with a simpler method `PublicInformation.ask_question()` that gets called repeatedly. To make all float-based sorts easier, I used the `float-ord` package. I also used it to clean up some of the sorting in `decide_wrapped()`. --- Cargo.lock | 7 ++ Cargo.toml | 1 + README.md | 4 +- src/main.rs | 1 + src/strategies/hat_helpers.rs | 62 ++++++---- src/strategies/information.rs | 220 +++++++++++++--------------------- 6 files changed, 132 insertions(+), 163 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bc376d2..e063b68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,6 +3,11 @@ name = "crossbeam" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "float-ord" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "fnv" version = "1.0.6" @@ -39,6 +44,7 @@ name = "rust_hanabi" version = "0.1.0" dependencies = [ "crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", @@ -47,6 +53,7 @@ dependencies = [ [metadata] "checksum crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "348228ce9f93d20ffc30c18e575f82fa41b9c8bf064806c65d41eba4771595a0" +"checksum float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bad48618fdb549078c333a7a8528acb57af271d0433bdecd523eb620628364e" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9047cfbd08a437050b363d35ef160452c5fe8ea5187ae0a624708c91581d685" "checksum libc 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4870ef6725dde13394134e587e4ab4eca13cb92e916209a31c851b49131d3c75" diff --git a/Cargo.toml b/Cargo.toml index 85f0aae..79c0936 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,4 +8,5 @@ rand = "*" log = "*" getopts = "*" fnv = "*" +float-ord = "*" crossbeam = "0.2.5" diff --git a/README.md b/README.md index baa7397..b5cd0f5 100644 --- a/README.md +++ b/README.md @@ -73,5 +73,5 @@ On the first 20000 seeds, we have these average scores and win rates: |---------|---------|---------|---------|---------| | cheat | 24.8594 | 24.9785 | 24.9720 | 24.9557 | | | 90.59 % | 98.17 % | 97.76 % | 96.42 % | -| info | 22.3736 | 24.7840 | 24.9261 | 24.9160 | -| | 10.41 % | 84.14 % | 94.33 % | 93.49 % | +| info | 22.5194 | 24.7942 | 24.9354 | 24.9220 | +| | 12.58 % | 84.46 % | 95.03 % | 94.01 % | diff --git a/src/main.rs b/src/main.rs index 12a05f7..86ff353 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ extern crate log; extern crate rand; extern crate crossbeam; extern crate fnv; +extern crate float_ord; mod helpers; mod game; diff --git a/src/strategies/hat_helpers.rs b/src/strategies/hat_helpers.rs index 5ba7c6f..7550a5b 100644 --- a/src/strategies/hat_helpers.rs +++ b/src/strategies/hat_helpers.rs @@ -117,18 +117,37 @@ pub trait PublicInformation: Clone { fn agrees_with(&self, other: Self) -> bool; - /// By defining `ask_questions`, we decides which `Question`s a player learns the answers to. + /// By defining `ask_question`, we decides which `Question`s a player learns the answers to. /// - /// A player "asks" a question by calling the callback. Questions can depend on the answers to - /// earlier questions: We are given a `&mut HandInfo` that we'll have to pass - /// to that callback; there, it will be modified to reflect the answer to the question. Note that `self` - /// is not modified and thus reflects the state before any player "asked" any question. + /// Whenever we need to compute a "hat value", this method will be called repeatedly, either + /// until the information runs out, or until it returns `None`. These questions can depend on + /// the answers to earlier questions: We are given a `&HandInfo` that + /// reflect the answers of previous questions for the same "hat value computation". /// - /// The product of the `info_amount()`s of all questions we have may not exceed `total_info`. - /// For convenience, we pass a `&mut u32` to the callback, and it will be updated to the - /// "remaining" information amount. - fn ask_questions(&self, &Player, &mut HandInfo, Callback, total_info: u32) - where Callback: FnMut(&mut HandInfo, &mut u32, Box); + /// Note that `self` does not reflect the answers to previous questions; it reflects the state + /// before the entire "hat value" calculation. + fn ask_question(&self, &Player, &HandInfo, total_info: u32) -> Option>; + + fn ask_question_wrapper(&self, player: &Player, hand_info: &HandInfo, total_info: u32) + -> Option> + { + assert!(total_info > 0); + if total_info == 1 { + None + } else { + let result = self.ask_question(player, hand_info, total_info); + if let Some(ref question) = result { + if question.info_amount() > total_info { + panic!("ask_question returned question with info_amount = {} > total_info = {}!", + question.info_amount(), total_info); + } + if question.info_amount() == 1 { + panic!("ask_question returned a trivial question!"); + } + } + result + } + } fn set_player_infos(&mut self, infos: Vec<(Player, HandInfo)>) { for (player, new_hand_info) in infos { @@ -142,14 +161,10 @@ pub trait PublicInformation: Clone { ) -> ModulusInformation { assert!(player != &view.player); let mut answer_info = ModulusInformation::none(); - { - let callback = |hand_info: &mut HandInfo, info_remaining: &mut u32, question: Box| { - let new_answer_info = question.answer_info(view.get_hand(player), view.get_board()); - question.acknowledge_answer_info(new_answer_info.clone(), hand_info, view.get_board()); - answer_info.combine(new_answer_info, total_info); - *info_remaining = answer_info.info_remaining(total_info); - }; - self.ask_questions(player, hand_info, callback, total_info); + while let Some(question) = self.ask_question_wrapper(player, hand_info, answer_info.info_remaining(total_info)) { + let new_answer_info = question.answer_info(view.get_hand(player), view.get_board()); + question.acknowledge_answer_info(new_answer_info.clone(), hand_info, view.get_board()); + answer_info.combine(new_answer_info, total_info); } answer_info.cast_up(total_info); answer_info @@ -162,14 +177,9 @@ pub trait PublicInformation: Clone { board: &BoardState, mut info: ModulusInformation, ) { - let total_info = info.modulus; - { - let callback = |hand_info: &mut HandInfo, info_remaining: &mut u32, question: Box| { - let answer_info = info.split(question.info_amount()); - question.acknowledge_answer_info(answer_info, hand_info, board); - *info_remaining = info.modulus; - }; - self.ask_questions(player, hand_info, callback, total_info); + while let Some(question) = self.ask_question_wrapper(player, hand_info, info.modulus) { + let answer_info = info.split(question.info_amount()); + question.acknowledge_answer_info(answer_info, hand_info, board); } assert!(info.value == 0); } diff --git a/src/strategies/information.rs b/src/strategies/information.rs index 8f1bfcc..e28951d 100644 --- a/src/strategies/information.rs +++ b/src/strategies/information.rs @@ -1,5 +1,6 @@ use fnv::{FnvHashMap, FnvHashSet}; use std::cmp::Ordering; +use float_ord::*; use strategy::*; use game::*; @@ -420,12 +421,6 @@ impl MyPublicInformation { }) } - fn knows_dead_card(&self, player: &Player) -> bool { - self.hand_info[player].iter().any(|table| { - table.probability_is_dead(&self.board) == 1.0 - }) - } - fn someone_else_needs_hint(&self, view: &OwnedGameView) -> bool { // Does another player have a playable card, but doesn't know it? view.get_other_players().iter().any(|player| { @@ -518,117 +513,86 @@ impl PublicInformation for MyPublicInformation { *self == other } - fn ask_questions( + fn ask_question( &self, - me: &Player, - hand_info: &mut HandInfo, - mut ask_question: Callback, - mut info_remaining: u32, - ) where Callback: FnMut(&mut HandInfo, &mut u32, Box) { + _me: &Player, + hand_info: &HandInfo, + total_info: u32, + ) -> Option> { // Changing anything inside this function will not break the information transfer // mechanisms! - let compute_augmented_hand_info = |hand_info: &HandInfo| { - hand_info.iter().cloned().enumerate() - .map(|(i, card_table)| { - let p_play = card_table.probability_is_playable(&self.board); - let p_dead = card_table.probability_is_dead(&self.board); - let is_determined = card_table.is_determined(); - (card_table, i, p_play, p_dead, is_determined) - }) - .collect::>() - }; + let augmented_hand_info_raw = hand_info.iter().cloned().enumerate().filter_map(|(i, card_table)| { + let p_play = card_table.probability_is_playable(&self.board); + let p_dead = card_table.probability_is_dead(&self.board); + Some((i, p_play, p_dead)) + }).collect::>(); + let know_playable_card = augmented_hand_info_raw.iter().any(|&(_, p_play, _)| p_play == 1.0); + let know_dead_card = augmented_hand_info_raw.iter().any(|&(_, _, p_dead)| p_dead == 1.0); - if !self.knows_playable_card(me) { // TODO: changing this to "if true {" slightly improves the three-player game and - // very slightly worsens the other cases. There probably is some - // other way to make this decision that's better in all cases. - let augmented_hand_info = compute_augmented_hand_info(hand_info); - let mut ask_play = augmented_hand_info.iter() - .filter(|&&(_, _, p_play, p_dead, is_determined)| { - if is_determined { return false; } - if p_dead == 1.0 { return false; } - if p_play == 1.0 || p_play < 0.2 { return false; } - true - }).collect::>(); - // sort by probability of play, then by index - ask_play.sort_by(|&&(_, i1, p1, _, _), &&(_, i2, p2, _, _)| { - // It's better to include higher-probability-of-playability - // cards into our combo question, since that maximizes our - // chance of finding out about a playable card. - let result = p2.partial_cmp(&p1); - if result == None || result == Some(Ordering::Equal) { - i1.cmp(&i2) - } else { - result.unwrap() - } - }); + // We don't need to find out anything about cards that are determined or dead. + let augmented_hand_info = augmented_hand_info_raw.into_iter().filter(|&(i, _, p_dead)| { + if p_dead == 1.0 { false } + else if hand_info[i].is_determined() { false } + else { true } + }).collect::>(); - if self.board.num_players == 5 { - for &(_, i, _, _, _) in ask_play { - ask_question(hand_info, &mut info_remaining, Box::new(q_is_playable(i))); - if info_remaining <= 1 { return; } - } - } else { - let mut rest_combo = AdditiveComboQuestion {questions: Vec::new()}; - for &(_, i, _, _, _) in ask_play { - if rest_combo.info_amount() < info_remaining { - rest_combo.questions.push(Box::new(q_is_playable(i))); - } - } - rest_combo.questions.reverse(); // It's better to put lower-probability-of-playability - // cards first: The difference only matters if we - // find a playable card, and conditional on that, - // it's better to find out about as many non-playable - // cards as possible. - if rest_combo.info_amount() < info_remaining && !self.knows_dead_card(me) { - let mut ask_dead = augmented_hand_info.iter() - .filter(|&&(_, _, _, p_dead, _)| { - p_dead > 0.0 && p_dead < 1.0 - }).collect::>(); - // sort by probability of death, then by index - ask_dead.sort_by(|&&(_, i1, _, d1, _), &&(_, i2, _, d2, _)| { - let result = d2.partial_cmp(&d1); - if result == None || result == Some(Ordering::Equal) { - i1.cmp(&i2) - } else { - result.unwrap() - } - }); - for &(_, i, _, _, _) in ask_dead { - if rest_combo.info_amount() < info_remaining { - rest_combo.questions.push(Box::new(q_is_dead(i))); - } - } - } - ask_question(hand_info, &mut info_remaining, Box::new(rest_combo)); - if info_remaining <= 1 { return; } + if !know_playable_card { + // Vector of tuples (ask_dead, i, p_yes), where ask_dead=false means we'll + // ask if the card at i is playable, and ask_dead=true means we ask if the card at i is + // dead. p_yes is the probability the answer is nonzero. + let mut to_ask: Vec<(bool, usize, f32)> = augmented_hand_info.iter().filter_map(|&(i, p_play, _)| { + if p_play == 0.0 { None } + else { Some((false, i, p_play)) } + }).collect(); + if !know_dead_card { + to_ask.extend(augmented_hand_info.iter().filter_map(|&(i, _, p_dead)| { + if p_dead == 0.0 { None } + else { Some((true, i, p_dead)) } + })); + } + + let combo_question_capacity = (total_info - 1) as usize; + if to_ask.len() > combo_question_capacity { + // The questions don't fit into an AdditiveComboQuestion. + // Sort by type (ask_dead=false first), then by p_yes (bigger first) + to_ask.sort_by_key(|&(ask_dead, _, p_yes)| {(ask_dead, FloatOrd(-p_yes))}); + to_ask.truncate(combo_question_capacity); + } + + // Sort by type (ask_dead=false first), then by p_yes (smaller first), since it's + // better to put lower-probability-of-playability/death cards first: The difference + // only matters if we find a playable/dead card, and conditional on that, it's better + // to find out about as many non-playable/non-dead cards as possible. + to_ask.sort_by_key(|&(ask_dead, _, p_yes)| {(ask_dead, FloatOrd(p_yes))}); + let questions = to_ask.into_iter().map(|(ask_dead, i, _)| -> Box { + if ask_dead { Box::new(q_is_dead(i)) } + else { Box::new(q_is_playable(i)) } + }).collect::>(); + if questions.len() > 0 { + return Some(Box::new(AdditiveComboQuestion { questions })) } } - // Recompute augmented_hand_info, incorporating the things we learned when asking questions - let augmented_hand_info = compute_augmented_hand_info(hand_info); - let mut ask_partition = augmented_hand_info.iter() - .filter(|&&(_, _, _, p_dead, is_determined)| { - if is_determined { return false } - // TODO: possibly still valuable to ask? - if p_dead == 1.0 { return false } - true - }).collect::>(); - // sort by probability of play, then by index - ask_partition.sort_by(|&&(_, i1, p1, _, _), &&(_, i2, p2, _, _)| { - // *higher* probabilities are better - let result = p2.partial_cmp(&p1); - if result == None || result == Some(Ordering::Equal) { - i1.cmp(&i2) - } else { - result.unwrap() - } - }); + let ask_play_score = |p_play: f32| FloatOrd((p_play-0.7).abs()); + let mut ask_play = augmented_hand_info.iter().filter(|&&(_, p_play, _)| { + ask_play_score(p_play) < FloatOrd(0.2) + }).cloned().collect::>(); + ask_play.sort_by_key(|&(i, p_play, _)| (ask_play_score(p_play), i)); + if let Some(&(i, _, _)) = ask_play.get(0) { + return Some(Box::new(q_is_playable(i))); + } - for &(ref card_table, i, _, _, _) in ask_partition { - let question = CardPossibilityPartition::new(i, info_remaining, &card_table, &self.board); - ask_question(hand_info, &mut info_remaining, Box::new(question)); - if info_remaining <= 1 { return; } + let mut ask_partition = augmented_hand_info; + // sort by probability of death (lowest first), then by index + ask_partition.sort_by_key(|&(i, _, p_death)| { + (FloatOrd(p_death), i) + }); + if let Some(&(i, _, _)) = ask_partition.get(0) { + let question = CardPossibilityPartition::new(i, total_info, &hand_info[i], &self.board); + Some(Box::new(question)) + } else { + None } } } @@ -814,24 +778,14 @@ impl InformationPlayerStrategy { // debug!("{}: {}", i, card_table); // } - let playable_cards = private_info.iter().enumerate().filter(|&(_, card_table)| { - card_table.probability_is_playable(&view.board) == 1.0 + // If possible, play the best playable card + // the higher the play_score, the better to play + let mut playable_cards = private_info.iter().enumerate().filter_map(|(i, card_table)| { + if card_table.probability_is_playable(&view.board) != 1.0 { return None; } + Some((i, self.get_average_play_score(view, card_table))) }).collect::>(); - - if playable_cards.len() > 0 { - // play the best playable card - // the higher the play_score, the better to play - let mut play_score = -1.0; - let mut play_index = 0; - - for (index, card_table) in playable_cards { - let score = self.get_average_play_score(view, card_table); - if score > play_score { - play_score = score; - play_index = index; - } - } - + playable_cards.sort_by_key(|&(i, play_score)| (FloatOrd(-play_score), i)); + if let Some(&(play_index, _)) = playable_cards.get(0) { return TurnChoice::Play(play_index) } @@ -901,23 +855,19 @@ impl InformationPlayerStrategy { return TurnChoice::Discard(useless_indices[0]); } - // Play the best discardable card - let mut compval = 0.0; - let mut index = 0; - for (i, card_table) in private_info.iter().enumerate() { + // Make the least risky discard. + let mut cards_by_discard_value = private_info.iter().enumerate().map(|(i, card_table)| { let probability_is_seen = card_table.probability_of_predicate(&|card| { view.can_see(card) }); - let my_compval = + let compval = 20.0 * probability_is_seen + 10.0 * card_table.probability_is_dispensable(&view.board) + card_table.average_value(); - - if my_compval > compval { - compval = my_compval; - index = i; - } - } + (i, compval) + }).collect::>(); + cards_by_discard_value.sort_by_key(|&(i, compval)| (FloatOrd(-compval), i)); + let (index, _) = cards_by_discard_value[0]; TurnChoice::Discard(index) }