From 9364a0686242cf1544ee2ad211da31c3d7cde554 Mon Sep 17 00:00:00 2001
From: Felix Bauckholt <felixbauckholt@gmail.com>
Date: Fri, 8 Mar 2019 20:59:14 +0100
Subject: [PATCH] Improve ask_question strategy

I also replaced the callback architecture behind
`PublicInformation.ask_questions()` with a simpler method
`PublicInformation.ask_question()` that gets called repeatedly.

To make all float-based sorts easier, I used the `float-ord` package.
I also used it to clean up some of the sorting in `decide_wrapped()`.
---
 Cargo.lock                    |   7 ++
 Cargo.toml                    |   1 +
 README.md                     |   4 +-
 src/main.rs                   |   1 +
 src/strategies/hat_helpers.rs |  62 ++++++----
 src/strategies/information.rs | 220 +++++++++++++---------------------
 6 files changed, 132 insertions(+), 163 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index bc376d2..e063b68 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,6 +3,11 @@ name = "crossbeam"
 version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
+[[package]]
+name = "float-ord"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "fnv"
 version = "1.0.6"
@@ -39,6 +44,7 @@ name = "rust_hanabi"
 version = "0.1.0"
 dependencies = [
  "crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -47,6 +53,7 @@ dependencies = [
 
 [metadata]
 "checksum crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "348228ce9f93d20ffc30c18e575f82fa41b9c8bf064806c65d41eba4771595a0"
+"checksum float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bad48618fdb549078c333a7a8528acb57af271d0433bdecd523eb620628364e"
 "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
 "checksum getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9047cfbd08a437050b363d35ef160452c5fe8ea5187ae0a624708c91581d685"
 "checksum libc 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4870ef6725dde13394134e587e4ab4eca13cb92e916209a31c851b49131d3c75"
diff --git a/Cargo.toml b/Cargo.toml
index 85f0aae..79c0936 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,4 +8,5 @@ rand = "*"
 log = "*"
 getopts = "*"
 fnv = "*"
+float-ord = "*"
 crossbeam = "0.2.5"
diff --git a/README.md b/README.md
index baa7397..b5cd0f5 100644
--- a/README.md
+++ b/README.md
@@ -73,5 +73,5 @@ On the first 20000 seeds, we have these average scores and win rates:
 |---------|---------|---------|---------|---------|
 | cheat   | 24.8594 | 24.9785 | 24.9720 | 24.9557 |
 |         | 90.59 % | 98.17 % | 97.76 % | 96.42 % |
-| info    | 22.3736 | 24.7840 | 24.9261 | 24.9160 |
-|         | 10.41 % | 84.14 % | 94.33 % | 93.49 % |
+| info    | 22.5194 | 24.7942 | 24.9354 | 24.9220 |
+|         | 12.58 % | 84.46 % | 95.03 % | 94.01 % |
diff --git a/src/main.rs b/src/main.rs
index 12a05f7..86ff353 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,6 +4,7 @@ extern crate log;
 extern crate rand;
 extern crate crossbeam;
 extern crate fnv;
+extern crate float_ord;
 
 mod helpers;
 mod game;
diff --git a/src/strategies/hat_helpers.rs b/src/strategies/hat_helpers.rs
index 5ba7c6f..7550a5b 100644
--- a/src/strategies/hat_helpers.rs
+++ b/src/strategies/hat_helpers.rs
@@ -117,18 +117,37 @@ pub trait PublicInformation: Clone {
 
     fn agrees_with(&self, other: Self) -> bool;
 
-    /// By defining `ask_questions`, we decides which `Question`s a player learns the answers to.
+    /// By defining `ask_question`, we decides which `Question`s a player learns the answers to.
     ///
-    /// A player "asks" a question by calling the callback. Questions can depend on the answers to
-    /// earlier questions: We are given a `&mut HandInfo<CardPossibilityTable>` that we'll have to pass
-    /// to that callback; there, it will be modified to reflect the answer to the question. Note that `self`
-    /// is not modified and thus reflects the state before any player "asked" any question.
+    /// Whenever we need to compute a "hat value", this method will be called repeatedly, either
+    /// until the information runs out, or until it returns `None`. These questions can depend on
+    /// the answers to earlier questions: We are given a `&HandInfo<CardPossibilityTable>` that
+    /// reflect the answers of previous questions for the same "hat value computation".
     ///
-    /// The product of the `info_amount()`s of all questions we have may not exceed `total_info`.
-    /// For convenience, we pass a `&mut u32` to the callback, and it will be updated to the
-    /// "remaining" information amount.
-    fn ask_questions<Callback>(&self, &Player, &mut HandInfo<CardPossibilityTable>, Callback, total_info: u32)
-        where Callback: FnMut(&mut HandInfo<CardPossibilityTable>, &mut u32, Box<Question>);
+    /// Note that `self` does not reflect the answers to previous questions; it reflects the state
+    /// before the entire "hat value" calculation.
+    fn ask_question(&self, &Player, &HandInfo<CardPossibilityTable>, total_info: u32) -> Option<Box<Question>>;
+
+    fn ask_question_wrapper(&self, player: &Player, hand_info: &HandInfo<CardPossibilityTable>, total_info: u32)
+        -> Option<Box<Question>>
+    {
+        assert!(total_info > 0);
+        if total_info == 1 {
+            None
+        } else {
+            let result = self.ask_question(player, hand_info, total_info);
+            if let Some(ref question) = result {
+                if question.info_amount() > total_info {
+                    panic!("ask_question returned question with info_amount = {} > total_info = {}!",
+                           question.info_amount(), total_info);
+                }
+                if question.info_amount() == 1 {
+                    panic!("ask_question returned a trivial question!");
+                }
+            }
+            result
+        }
+    }
 
     fn set_player_infos(&mut self, infos: Vec<(Player, HandInfo<CardPossibilityTable>)>) {
         for (player, new_hand_info) in infos {
@@ -142,14 +161,10 @@ pub trait PublicInformation: Clone {
     ) -> ModulusInformation {
         assert!(player != &view.player);
         let mut answer_info = ModulusInformation::none();
-        {
-            let callback = |hand_info: &mut HandInfo<CardPossibilityTable>, info_remaining: &mut u32, question: Box<Question>| {
-                let new_answer_info = question.answer_info(view.get_hand(player), view.get_board());
-                question.acknowledge_answer_info(new_answer_info.clone(), hand_info, view.get_board());
-                answer_info.combine(new_answer_info, total_info);
-                *info_remaining = answer_info.info_remaining(total_info);
-            };
-            self.ask_questions(player, hand_info, callback, total_info);
+        while let Some(question) = self.ask_question_wrapper(player, hand_info, answer_info.info_remaining(total_info)) {
+            let new_answer_info = question.answer_info(view.get_hand(player), view.get_board());
+            question.acknowledge_answer_info(new_answer_info.clone(), hand_info, view.get_board());
+            answer_info.combine(new_answer_info, total_info);
         }
         answer_info.cast_up(total_info);
         answer_info
@@ -162,14 +177,9 @@ pub trait PublicInformation: Clone {
         board: &BoardState,
         mut info: ModulusInformation,
     ) {
-        let total_info = info.modulus;
-        {
-            let callback = |hand_info: &mut HandInfo<CardPossibilityTable>, info_remaining: &mut u32, question: Box<Question>| {
-                let answer_info = info.split(question.info_amount());
-                question.acknowledge_answer_info(answer_info, hand_info, board);
-                *info_remaining = info.modulus;
-            };
-            self.ask_questions(player, hand_info, callback, total_info);
+        while let Some(question) = self.ask_question_wrapper(player, hand_info, info.modulus) {
+            let answer_info = info.split(question.info_amount());
+            question.acknowledge_answer_info(answer_info, hand_info, board);
         }
         assert!(info.value == 0);
     }
diff --git a/src/strategies/information.rs b/src/strategies/information.rs
index 8f1bfcc..e28951d 100644
--- a/src/strategies/information.rs
+++ b/src/strategies/information.rs
@@ -1,5 +1,6 @@
 use fnv::{FnvHashMap, FnvHashSet};
 use std::cmp::Ordering;
+use float_ord::*;
 
 use strategy::*;
 use game::*;
@@ -420,12 +421,6 @@ impl MyPublicInformation {
             })
     }
 
-    fn knows_dead_card(&self, player: &Player) -> bool {
-            self.hand_info[player].iter().any(|table| {
-                table.probability_is_dead(&self.board) == 1.0
-            })
-    }
-
     fn someone_else_needs_hint(&self, view: &OwnedGameView) -> bool {
         // Does another player have a playable card, but doesn't know it?
         view.get_other_players().iter().any(|player| {
@@ -518,117 +513,86 @@ impl PublicInformation for MyPublicInformation {
         *self == other
     }
 
-    fn ask_questions<Callback>(
+    fn ask_question(
         &self,
-        me: &Player,
-        hand_info: &mut HandInfo<CardPossibilityTable>,
-        mut ask_question: Callback,
-        mut info_remaining: u32,
-    ) where Callback: FnMut(&mut HandInfo<CardPossibilityTable>, &mut u32, Box<Question>) {
+        _me: &Player,
+        hand_info: &HandInfo<CardPossibilityTable>,
+        total_info: u32,
+    ) -> Option<Box<Question>> {
         // Changing anything inside this function will not break the information transfer
         // mechanisms!
 
-        let compute_augmented_hand_info = |hand_info: &HandInfo<CardPossibilityTable>| {
-            hand_info.iter().cloned().enumerate()
-            .map(|(i, card_table)| {
-                let p_play = card_table.probability_is_playable(&self.board);
-                let p_dead = card_table.probability_is_dead(&self.board);
-                let is_determined = card_table.is_determined();
-                (card_table, i, p_play, p_dead, is_determined)
-            })
-            .collect::<Vec<_>>()
-        };
+        let augmented_hand_info_raw = hand_info.iter().cloned().enumerate().filter_map(|(i, card_table)| {
+            let p_play = card_table.probability_is_playable(&self.board);
+            let p_dead = card_table.probability_is_dead(&self.board);
+            Some((i, p_play, p_dead))
+        }).collect::<Vec<_>>();
+        let know_playable_card = augmented_hand_info_raw.iter().any(|&(_, p_play, _)| p_play == 1.0);
+        let know_dead_card     = augmented_hand_info_raw.iter().any(|&(_, _, p_dead)| p_dead == 1.0);
 
-        if !self.knows_playable_card(me) { // TODO: changing this to "if true {" slightly improves the three-player game and
-                                           // very slightly worsens the other cases. There probably is some
-                                           // other way to make this decision that's better in all cases.
-            let augmented_hand_info = compute_augmented_hand_info(hand_info);
-            let mut ask_play = augmented_hand_info.iter()
-                .filter(|&&(_, _, p_play, p_dead, is_determined)| {
-                    if is_determined { return false; }
-                    if p_dead == 1.0  { return false; }
-                    if p_play == 1.0 || p_play < 0.2 { return false; }
-                    true
-                }).collect::<Vec<_>>();
-            // sort by probability of play, then by index
-            ask_play.sort_by(|&&(_, i1, p1, _, _), &&(_, i2, p2, _, _)| {
-                    // It's better to include higher-probability-of-playability
-                    // cards into our combo question, since that maximizes our
-                    // chance of finding out about a playable card.
-                    let result = p2.partial_cmp(&p1);
-                    if result == None || result == Some(Ordering::Equal) {
-                        i1.cmp(&i2)
-                    } else {
-                        result.unwrap()
-                    }
-            });
+        // We don't need to find out anything about cards that are determined or dead.
+        let augmented_hand_info = augmented_hand_info_raw.into_iter().filter(|&(i, _, p_dead)| {
+            if p_dead == 1.0 { false }
+            else if hand_info[i].is_determined() { false }
+            else { true }
+        }).collect::<Vec<_>>();
 
-            if self.board.num_players == 5 {
-                for &(_, i, _, _, _) in ask_play {
-                    ask_question(hand_info, &mut info_remaining, Box::new(q_is_playable(i)));
-                    if info_remaining <= 1 { return; }
-                }
-            } else {
-                let mut rest_combo = AdditiveComboQuestion {questions: Vec::new()};
-                for &(_, i, _, _, _) in ask_play {
-                    if rest_combo.info_amount() < info_remaining {
-                        rest_combo.questions.push(Box::new(q_is_playable(i)));
-                    }
-                }
-                rest_combo.questions.reverse(); // It's better to put lower-probability-of-playability
-                                                // cards first: The difference only matters if we
-                                                // find a playable card, and conditional on that,
-                                                // it's better to find out about as many non-playable
-                                                // cards as possible.
-                if rest_combo.info_amount() < info_remaining && !self.knows_dead_card(me) {
-                    let mut ask_dead = augmented_hand_info.iter()
-                        .filter(|&&(_, _, _, p_dead, _)| {
-                            p_dead > 0.0 && p_dead < 1.0
-                        }).collect::<Vec<_>>();
-                    // sort by probability of death, then by index
-                    ask_dead.sort_by(|&&(_, i1, _, d1, _), &&(_, i2, _, d2, _)| {
-                            let result = d2.partial_cmp(&d1);
-                            if result == None || result == Some(Ordering::Equal) {
-                                i1.cmp(&i2)
-                            } else {
-                                result.unwrap()
-                            }
-                    });
-                    for &(_, i, _, _, _) in ask_dead {
-                        if rest_combo.info_amount() < info_remaining {
-                            rest_combo.questions.push(Box::new(q_is_dead(i)));
-                        }
-                    }
-                }
-                ask_question(hand_info, &mut info_remaining, Box::new(rest_combo));
-                if info_remaining <= 1 { return; }
+        if !know_playable_card {
+            // Vector of tuples (ask_dead, i, p_yes), where ask_dead=false means we'll
+            // ask if the card at i is playable, and ask_dead=true means we ask if the card at i is
+            // dead. p_yes is the probability the answer is nonzero.
+            let mut to_ask: Vec<(bool, usize, f32)> = augmented_hand_info.iter().filter_map(|&(i, p_play, _)| {
+                if p_play == 0.0 { None }
+                else { Some((false, i, p_play)) }
+            }).collect();
+            if !know_dead_card {
+                to_ask.extend(augmented_hand_info.iter().filter_map(|&(i, _, p_dead)| {
+                    if p_dead == 0.0 { None }
+                    else { Some((true, i, p_dead)) }
+                }));
+            }
+
+            let combo_question_capacity = (total_info - 1) as usize;
+            if to_ask.len() > combo_question_capacity {
+                // The questions don't fit into an AdditiveComboQuestion.
+                // Sort by type (ask_dead=false first), then by p_yes (bigger first)
+                to_ask.sort_by_key(|&(ask_dead, _, p_yes)| {(ask_dead, FloatOrd(-p_yes))});
+                to_ask.truncate(combo_question_capacity);
+            }
+
+            // Sort by type (ask_dead=false first), then by p_yes (smaller first), since it's
+            // better to put lower-probability-of-playability/death cards first: The difference
+            // only matters if we find a playable/dead card, and conditional on that, it's better
+            // to find out about as many non-playable/non-dead cards as possible.
+            to_ask.sort_by_key(|&(ask_dead, _, p_yes)| {(ask_dead, FloatOrd(p_yes))});
+            let questions = to_ask.into_iter().map(|(ask_dead, i, _)| -> Box<Question> {
+                if ask_dead { Box::new(q_is_dead(i)) }
+                else        { Box::new(q_is_playable(i)) }
+            }).collect::<Vec<_>>();
+            if questions.len() > 0 {
+                return Some(Box::new(AdditiveComboQuestion { questions }))
             }
         }
 
-        // Recompute augmented_hand_info, incorporating the things we learned when asking questions
-        let augmented_hand_info = compute_augmented_hand_info(hand_info);
-        let mut ask_partition = augmented_hand_info.iter()
-            .filter(|&&(_, _, _, p_dead, is_determined)| {
-                if is_determined { return false }
-                // TODO: possibly still valuable to ask?
-                if p_dead == 1.0 { return false }
-                true
-            }).collect::<Vec<_>>();
-        // sort by probability of play, then by index
-        ask_partition.sort_by(|&&(_, i1, p1, _, _), &&(_, i2, p2, _, _)| {
-                // *higher* probabilities are better
-                let result = p2.partial_cmp(&p1);
-                if result == None || result == Some(Ordering::Equal) {
-                    i1.cmp(&i2)
-                } else {
-                    result.unwrap()
-                }
-        });
+        let ask_play_score = |p_play: f32| FloatOrd((p_play-0.7).abs());
+        let mut ask_play = augmented_hand_info.iter().filter(|&&(_, p_play, _)| {
+            ask_play_score(p_play) < FloatOrd(0.2)
+        }).cloned().collect::<Vec<_>>();
+        ask_play.sort_by_key(|&(i, p_play, _)| (ask_play_score(p_play), i));
+        if let Some(&(i, _, _)) = ask_play.get(0) {
+            return Some(Box::new(q_is_playable(i)));
+        }
 
-        for &(ref card_table, i, _, _, _) in ask_partition {
-            let question = CardPossibilityPartition::new(i, info_remaining, &card_table, &self.board);
-            ask_question(hand_info, &mut info_remaining, Box::new(question));
-            if info_remaining <= 1 { return; }
+        let mut ask_partition = augmented_hand_info;
+        // sort by probability of death (lowest first), then by index
+        ask_partition.sort_by_key(|&(i, _, p_death)| {
+            (FloatOrd(p_death), i)
+        });
+        if let Some(&(i, _, _)) = ask_partition.get(0) {
+            let question = CardPossibilityPartition::new(i, total_info, &hand_info[i], &self.board);
+            Some(Box::new(question))
+        } else {
+            None
         }
     }
 }
@@ -814,24 +778,14 @@ impl InformationPlayerStrategy {
         //     debug!("{}: {}", i, card_table);
         // }
 
-        let playable_cards = private_info.iter().enumerate().filter(|&(_, card_table)| {
-            card_table.probability_is_playable(&view.board) == 1.0
+        // If possible, play the best playable card
+        // the higher the play_score, the better to play
+        let mut playable_cards = private_info.iter().enumerate().filter_map(|(i, card_table)| {
+            if card_table.probability_is_playable(&view.board) != 1.0 { return None; }
+            Some((i, self.get_average_play_score(view, card_table)))
         }).collect::<Vec<_>>();
-
-        if playable_cards.len() > 0 {
-            // play the best playable card
-            // the higher the play_score, the better to play
-            let mut play_score = -1.0;
-            let mut play_index = 0;
-
-            for (index, card_table) in playable_cards {
-                let score = self.get_average_play_score(view, card_table);
-                if score > play_score {
-                    play_score = score;
-                    play_index = index;
-                }
-            }
-
+        playable_cards.sort_by_key(|&(i, play_score)| (FloatOrd(-play_score), i));
+        if let Some(&(play_index, _)) = playable_cards.get(0) {
             return TurnChoice::Play(play_index)
         }
 
@@ -901,23 +855,19 @@ impl InformationPlayerStrategy {
             return TurnChoice::Discard(useless_indices[0]);
         }
 
-        // Play the best discardable card
-        let mut compval = 0.0;
-        let mut index = 0;
-        for (i, card_table) in private_info.iter().enumerate() {
+        // Make the least risky discard.
+        let mut cards_by_discard_value = private_info.iter().enumerate().map(|(i, card_table)| {
             let probability_is_seen = card_table.probability_of_predicate(&|card| {
                 view.can_see(card)
             });
-            let my_compval =
+            let compval =
                 20.0 * probability_is_seen
                 + 10.0 * card_table.probability_is_dispensable(&view.board)
                 + card_table.average_value();
-
-            if my_compval > compval {
-                compval = my_compval;
-                index = i;
-            }
-        }
+            (i, compval)
+        }).collect::<Vec<_>>();
+        cards_by_discard_value.sort_by_key(|&(i, compval)| (FloatOrd(-compval), i));
+        let (index, _) = cards_by_discard_value[0];
         TurnChoice::Discard(index)
     }