Merge pull request #7 from felixbauckholt/information-efficiency

Refactoring to be able to improve information efficiency
2019-03-20 02:35:57 +01:00 · 2019-03-20 02:35:57 +01:00 · 7f384cc15d
commit 7f384cc15d
parent c791adcb5a 1c229227ab
8 changed files with 829 additions and 752 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3,6 +3,11 @@ name = "crossbeam"
 version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"

+[[package]]
+name = "float-ord"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "fnv"
 version = "1.0.6"
@ -39,6 +44,7 @@ name = "rust_hanabi"
 version = "0.1.0"
 dependencies = [
 "crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
 "log 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
@ -47,6 +53,7 @@ dependencies = [

 [metadata]
 "checksum crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "348228ce9f93d20ffc30c18e575f82fa41b9c8bf064806c65d41eba4771595a0"
+"checksum float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bad48618fdb549078c333a7a8528acb57af271d0433bdecd523eb620628364e"
 "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
 "checksum getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9047cfbd08a437050b363d35ef160452c5fe8ea5187ae0a624708c91581d685"
 "checksum libc 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4870ef6725dde13394134e587e4ab4eca13cb92e916209a31c851b49131d3c75"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -8,4 +8,5 @@ rand = "*"
 log = "*"
 getopts = "*"
 fnv = "*"
+float-ord = "*"
 crossbeam = "0.2.5"
--- a/README.md
+++ b/README.md
@ -73,5 +73,5 @@ On the first 20000 seeds, we have these scores and win rates (average ± standar
 |---------|------------------|------------------|------------------|------------------|
 | cheat   | 24.8594 ± 0.0036 | 24.9785 ± 0.0012 | 24.9720 ± 0.0014 | 24.9557 ± 0.0018 |
 |         | 90.59 ± 0.21 % | 98.17 ± 0.09 % | 97.76 ± 0.10 % | 96.42 ± 0.13 % |
-| info    | 22.3249 ± 0.0128 | 24.7278 ± 0.0046 | 24.8919 ± 0.0029 | 24.8961 ± 0.0027 |
-|         | 09.81 ± 0.21 % | 80.54 ± 0.28 % | 91.67 ± 0.20 % | 91.90 ± 0.19 % |
+| info    | 22.5194 ± 0.0125 | 24.7942 ± 0.0039 | 24.9354 ± 0.0022 | 24.9220 ± 0.0024 |
+|         | 12.58 ± 0.23 % | 84.46 ± 0.26 % | 95.03 ± 0.15 % | 94.01 ± 0.17 % |
--- a/src/game.rs
+++ b/src/game.rs
@ -44,7 +44,7 @@ impl fmt::Debug for Card {
    }
 }

-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct CardCounts {
    counts: FnvHashMap<Card, u32>,
 }
@ -99,7 +99,7 @@ impl fmt::Display for CardCounts {

 pub type Cards = Vec<Card>;

-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct Discard {
    pub cards: Cards,
    counts: CardCounts,
@ -137,7 +137,7 @@ impl fmt::Display for Discard {
 pub type Score = u32;
 pub const PERFECT_SCORE: Score = (NUM_COLORS * NUM_VALUES) as u32;

-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct Firework {
    pub color: Color,
    pub top: Value,
@ -198,14 +198,14 @@ impl fmt::Display for Hinted {
    }
 }

-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct Hint {
    pub player: Player,
    pub hinted: Hinted,
 }

 // represents the choice a player made in a given turn
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub enum TurnChoice {
    Hint(Hint),
    Discard(usize), // index of card to discard
@ -213,7 +213,7 @@ pub enum TurnChoice {
 }

 // represents what happened in a turn
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub enum TurnResult {
    Hint(Vec<bool>),  // vector of whether each was in the hint
    Discard(Card),    // card discarded
@ -221,7 +221,7 @@ pub enum TurnResult {
 }

 // represents a turn taken in the game
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct TurnRecord {
    pub player: Player,
    pub choice: TurnChoice,
@ -243,7 +243,7 @@ pub struct GameOptions {

 // State of everything except the player's hands
 // Is all completely common knowledge
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct BoardState {
    pub deck_size: u32,
    pub total_cards: u32,
--- a/src/helpers.rs
+++ b/src/helpers.rs
@ -246,7 +246,7 @@ impl fmt::Display for SimpleCardInfo {
 // Can represent information of the form:
 // this card is/isn't possible
 // also, maintains integer weights for the cards
-#[derive(Clone,Debug)]
+#[derive(Clone,Debug,Eq,PartialEq)]
 pub struct CardPossibilityTable {
    possible: HashMap<Card, u32>,
 }
@ -369,7 +369,7 @@ impl fmt::Display for CardPossibilityTable {
    }
 }

-#[derive(Clone)]
+#[derive(Clone,Eq,PartialEq)]
 pub struct HandInfo<T> where T: CardInfo {
    pub hand_info: Vec<T>
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -4,6 +4,7 @@ extern crate log;
 extern crate rand;
 extern crate crossbeam;
 extern crate fnv;
+extern crate float_ord;

 mod helpers;
 mod game;
@ -12,6 +13,7 @@ mod strategy;
 mod strategies {
    pub mod examples;
    pub mod cheating;
+    mod hat_helpers;
    pub mod information;
 }

--- a/src/strategies/hat_helpers.rs
+++ b/src/strategies/hat_helpers.rs
@ -0,0 +1,243 @@
+use game::*;
+use helpers::*;
+
+#[derive(Debug,Clone)]
+pub struct ModulusInformation {
+    pub modulus: u32,
+    pub value: u32,
+}
+impl ModulusInformation {
+    pub fn new(modulus: u32, value: u32) -> Self {
+        assert!(value < modulus);
+        ModulusInformation {
+            modulus: modulus,
+            value: value,
+        }
+    }
+
+    pub fn none() -> Self {
+        Self::new(1, 0)
+    }
+
+    pub fn combine(&mut self, other: Self, max_modulus: u32) {
+        assert!(other.modulus <= self.info_remaining(max_modulus));
+        self.value = self.value + self.modulus * other.value;
+        self.modulus = std::cmp::min(max_modulus, self.modulus * other.modulus);
+        assert!(self.value < self.modulus);
+    }
+
+    pub fn info_remaining(&self, max_modulus: u32) -> u32 {
+        // We want to find the largest number `result` such that
+        // `self.combine(other, max_modulus)` works whenever `other.modulus == result`.
+        // `other.value` can be up to `result - 1`, so calling combine could increase our value to
+        // up to `self.value + self.modulus * (result - 1)`, which must always be less than
+        // `max_modulus`.
+        // Therefore, we compute the largest number `result` such that
+        // `self.value + self.modulus * (result - 1) < max_modulus`.
+        let result = (max_modulus - self.value - 1) / self.modulus + 1;
+        assert!(self.value + self.modulus * (result - 1) < max_modulus);
+        assert!(self.value + self.modulus * ((result + 1) - 1) >= max_modulus);
+        result
+    }
+
+    pub fn split(&mut self, modulus: u32) -> Self {
+        assert!(self.modulus >= modulus);
+        let original_modulus = self.modulus;
+        let original_value = self.value;
+        let value = self.value % modulus;
+        self.value = self.value / modulus;
+        // `self.modulus` is the largest number such that
+        // `value + (self.modulus - 1) * modulus < original_modulus`.
+        // TODO: find an explanation of why this makes everything work out
+        self.modulus = (original_modulus - value - 1) / modulus + 1;
+        assert!(original_value == value + modulus * self.value);
+        Self::new(modulus, value)
+    }
+
+    pub fn cast_up(&mut self, modulus: u32) {
+        assert!(self.modulus <= modulus);
+        self.modulus = modulus;
+    }
+
+    // pub fn cast_down(&mut self, modulus: u32) {
+    //     assert!(self.modulus >= modulus);
+    //     assert!(self.value < modulus);
+    //     self.modulus = modulus;
+    // }
+
+    pub fn add(&mut self, other: &Self) {
+        assert!(self.modulus == other.modulus);
+        self.value = (self.value + other.value) % self.modulus;
+    }
+
+    pub fn subtract(&mut self, other: &Self) {
+        assert!(self.modulus == other.modulus);
+        self.value = (self.modulus + self.value - other.value) % self.modulus;
+    }
+}
+
+pub trait Question {
+    // how much info does this question ask for?
+    fn info_amount(&self) -> u32;
+    // get the answer to this question, given cards
+    fn answer(&self, &Cards, &BoardState) -> u32;
+    // process the answer to this question, updating card info
+    fn acknowledge_answer(
+        &self, value: u32, &mut HandInfo<CardPossibilityTable>, &BoardState
+    );
+
+    fn answer_info(&self, hand: &Cards, board: &BoardState) -> ModulusInformation {
+        ModulusInformation::new(
+            self.info_amount(),
+            self.answer(hand, board)
+        )
+    }
+
+    fn acknowledge_answer_info(
+        &self,
+        answer: ModulusInformation,
+        hand_info: &mut HandInfo<CardPossibilityTable>,
+        board: &BoardState
+    ) {
+        assert!(self.info_amount() == answer.modulus);
+        self.acknowledge_answer(answer.value, hand_info, board);
+    }
+}
+
+pub trait PublicInformation: Clone {
+    fn get_player_info(&self, &Player) -> HandInfo<CardPossibilityTable>;
+    fn set_player_info(&mut self, &Player, HandInfo<CardPossibilityTable>);
+
+    fn new(&BoardState) -> Self;
+    fn set_board(&mut self, &BoardState);
+
+    /// If we store more state than just `HandInfo<CardPossibilityTable>`s, update it after `set_player_info` has been called.
+    fn update_other_info(&mut self) {
+    }
+
+    fn agrees_with(&self, other: Self) -> bool;
+
+    /// By defining `ask_question`, we decides which `Question`s a player learns the answers to.
+    ///
+    /// Whenever we need to compute a "hat value", this method will be called repeatedly, either
+    /// until the information runs out, or until it returns `None`. These questions can depend on
+    /// the answers to earlier questions: We are given a `&HandInfo<CardPossibilityTable>` that
+    /// reflect the answers of previous questions for the same "hat value computation".
+    ///
+    /// Note that `self` does not reflect the answers to previous questions; it reflects the state
+    /// before the entire "hat value" calculation.
+    fn ask_question(&self, &Player, &HandInfo<CardPossibilityTable>, total_info: u32) -> Option<Box<Question>>;
+
+    fn ask_question_wrapper(&self, player: &Player, hand_info: &HandInfo<CardPossibilityTable>, total_info: u32)
+        -> Option<Box<Question>>
+    {
+        assert!(total_info > 0);
+        if total_info == 1 {
+            None
+        } else {
+            let result = self.ask_question(player, hand_info, total_info);
+            if let Some(ref question) = result {
+                if question.info_amount() > total_info {
+                    panic!("ask_question returned question with info_amount = {} > total_info = {}!",
+                           question.info_amount(), total_info);
+                }
+                if question.info_amount() == 1 {
+                    panic!("ask_question returned a trivial question!");
+                }
+            }
+            result
+        }
+    }
+
+    fn set_player_infos(&mut self, infos: Vec<(Player, HandInfo<CardPossibilityTable>)>) {
+        for (player, new_hand_info) in infos {
+            self.set_player_info(&player, new_hand_info);
+        }
+        self.update_other_info();
+    }
+
+    fn get_hat_info_for_player(
+        &self, player: &Player, hand_info: &mut HandInfo<CardPossibilityTable>, total_info: u32, view: &OwnedGameView
+    ) -> ModulusInformation {
+        assert!(player != &view.player);
+        let mut answer_info = ModulusInformation::none();
+        while let Some(question) = self.ask_question_wrapper(player, hand_info, answer_info.info_remaining(total_info)) {
+            let new_answer_info = question.answer_info(view.get_hand(player), view.get_board());
+            question.acknowledge_answer_info(new_answer_info.clone(), hand_info, view.get_board());
+            answer_info.combine(new_answer_info, total_info);
+        }
+        answer_info.cast_up(total_info);
+        answer_info
+    }
+
+    fn update_from_hat_info_for_player(
+        &self,
+        player: &Player,
+        hand_info: &mut HandInfo<CardPossibilityTable>,
+        board: &BoardState,
+        mut info: ModulusInformation,
+    ) {
+        while let Some(question) = self.ask_question_wrapper(player, hand_info, info.modulus) {
+            let answer_info = info.split(question.info_amount());
+            question.acknowledge_answer_info(answer_info, hand_info, board);
+        }
+        assert!(info.value == 0);
+    }
+
+    /// When deciding on a move, if we can choose between `total_info` choices,
+    /// `self.get_hat_sum(total_info, view)` tells us which choice to take, and at the same time
+    /// mutates `self` to simulate the choice becoming common knowledge.
+    fn get_hat_sum(&mut self, total_info: u32, view: &OwnedGameView) -> ModulusInformation {
+        let (infos, new_player_hands): (Vec<_>, Vec<_>) = view.get_other_players().iter().map(|player| {
+            let mut hand_info = self.get_player_info(player);
+            let info = self.get_hat_info_for_player(player, &mut hand_info, total_info, view);
+            (info, (player.clone(), hand_info))
+        }).unzip();
+        self.set_player_infos(new_player_hands);
+        infos.into_iter().fold(
+            ModulusInformation::new(total_info, 0),
+            |mut sum_info, info| {
+                sum_info.add(&info);
+                sum_info
+            }
+        )
+    }
+
+    /// When updating on a move, if we infer that the player making the move called `get_hat_sum()`
+    /// and got the result `info`, we can call `self.update_from_hat_sum(info, view)` to update
+    /// from that fact.
+    fn update_from_hat_sum(&mut self, mut info: ModulusInformation, view: &OwnedGameView) {
+        let info_source = view.board.player;
+        let (other_infos, mut new_player_hands): (Vec<_>, Vec<_>) = view.get_other_players().into_iter().filter(|player| {
+            *player != info_source
+        }).map(|player| {
+            let mut hand_info = self.get_player_info(&player);
+            let player_info = self.get_hat_info_for_player(&player, &mut hand_info, info.modulus, view);
+            (player_info, (player.clone(), hand_info))
+        }).unzip();
+        for other_info in other_infos {
+            info.subtract(&other_info);
+        }
+        let me = view.player;
+        if me == info_source {
+            assert!(info.value == 0);
+        } else {
+            let mut my_hand = self.get_player_info(&me);
+            self.update_from_hat_info_for_player(&me, &mut my_hand, &view.board, info);
+            new_player_hands.push((me, my_hand));
+        }
+        self.set_player_infos(new_player_hands);
+    }
+
+    fn get_private_info(&self, view: &OwnedGameView) -> HandInfo<CardPossibilityTable> {
+        let mut info = self.get_player_info(&view.player);
+        for card_table in info.iter_mut() {
+            for (_, hand) in &view.other_hands {
+                for card in hand {
+                    card_table.decrement_weight_if_possible(card);
+                }
+            }
+        }
+        info
+    }
+}
--- a/src/strategies/information.rs
+++ b/src/strategies/information.rs