Merge pull request #7 from felixbauckholt/information-efficiency

Refactoring to be able to improve information efficiency
2019-03-20 02:35:57 +01:00 · 2019-03-20 02:35:57 +01:00 · 7f384cc15d
commit 7f384cc15d
parent c791adcb5a 1c229227ab
8 changed files with 829 additions and 752 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3,6 +3,11 @@ name = "crossbeam"
 version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 [[package]]
 name = "float-ord"
 version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 [[package]]
 name = "fnv"
 version = "1.0.6"
@ -39,6 +44,7 @@ name = "rust_hanabi"
 version = "0.1.0"
 dependencies = [
 "crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 "float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
 "log 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
@ -47,6 +53,7 @@ dependencies = [
 [metadata]
 "checksum crossbeam 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "348228ce9f93d20ffc30c18e575f82fa41b9c8bf064806c65d41eba4771595a0"
 "checksum float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bad48618fdb549078c333a7a8528acb57af271d0433bdecd523eb620628364e"
 "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
 "checksum getopts 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9047cfbd08a437050b363d35ef160452c5fe8ea5187ae0a624708c91581d685"
 "checksum libc 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4870ef6725dde13394134e587e4ab4eca13cb92e916209a31c851b49131d3c75"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -8,4 +8,5 @@ rand = "*"
 log = "*"
 getopts = "*"
 fnv = "*"
 float-ord = "*"
 crossbeam = "0.2.5"
--- a/README.md
+++ b/README.md
@ -73,5 +73,5 @@ On the first 20000 seeds, we have these scores and win rates (average ± standar
 |---------|------------------|------------------|------------------|------------------|
 | cheat   | 24.8594 ± 0.0036 | 24.9785 ± 0.0012 | 24.9720 ± 0.0014 | 24.9557 ± 0.0018 |
 |         | 90.59 ± 0.21 % | 98.17 ± 0.09 % | 97.76 ± 0.10 % | 96.42 ± 0.13 % |
-| info    | 22.3249 ± 0.0128 | 24.7278 ± 0.0046 | 24.8919 ± 0.0029 | 24.8961 ± 0.0027 |
+| info    | 22.5194 ± 0.0125 | 24.7942 ± 0.0039 | 24.9354 ± 0.0022 | 24.9220 ± 0.0024 |
-|         | 09.81 ± 0.21 % | 80.54 ± 0.28 % | 91.67 ± 0.20 % | 91.90 ± 0.19 % |
+|         | 12.58 ± 0.23 % | 84.46 ± 0.26 % | 95.03 ± 0.15 % | 94.01 ± 0.17 % |
--- a/src/game.rs
+++ b/src/game.rs
@ -44,7 +44,7 @@ impl fmt::Debug for Card {
    }
 }
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct CardCounts {
    counts: FnvHashMap<Card, u32>,
 }
@ -99,7 +99,7 @@ impl fmt::Display for CardCounts {
 pub type Cards = Vec<Card>;
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct Discard {
    pub cards: Cards,
    counts: CardCounts,
@ -137,7 +137,7 @@ impl fmt::Display for Discard {
 pub type Score = u32;
 pub const PERFECT_SCORE: Score = (NUM_COLORS * NUM_VALUES) as u32;
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct Firework {
    pub color: Color,
    pub top: Value,
@ -198,14 +198,14 @@ impl fmt::Display for Hinted {
    }
 }
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct Hint {
    pub player: Player,
    pub hinted: Hinted,
 }
 // represents the choice a player made in a given turn
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub enum TurnChoice {
    Hint(Hint),
    Discard(usize), // index of card to discard
@ -213,7 +213,7 @@ pub enum TurnChoice {
 }
 // represents what happened in a turn
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub enum TurnResult {
    Hint(Vec<bool>),  // vector of whether each was in the hint
    Discard(Card),    // card discarded
@ -221,7 +221,7 @@ pub enum TurnResult {
 }
 // represents a turn taken in the game
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct TurnRecord {
    pub player: Player,
    pub choice: TurnChoice,
@ -243,7 +243,7 @@ pub struct GameOptions {
 // State of everything except the player's hands
 // Is all completely common knowledge
-#[derive(Debug,Clone)]
+#[derive(Debug,Clone,Eq,PartialEq)]
 pub struct BoardState {
    pub deck_size: u32,
    pub total_cards: u32,
--- a/src/helpers.rs
+++ b/src/helpers.rs
@ -246,7 +246,7 @@ impl fmt::Display for SimpleCardInfo {
 // Can represent information of the form:
 // this card is/isn't possible
 // also, maintains integer weights for the cards
-#[derive(Clone,Debug)]
+#[derive(Clone,Debug,Eq,PartialEq)]
 pub struct CardPossibilityTable {
    possible: HashMap<Card, u32>,
 }
@ -369,7 +369,7 @@ impl fmt::Display for CardPossibilityTable {
    }
 }
-#[derive(Clone)]
+#[derive(Clone,Eq,PartialEq)]
 pub struct HandInfo<T> where T: CardInfo {
    pub hand_info: Vec<T>
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -4,6 +4,7 @@ extern crate log;
 extern crate rand;
 extern crate crossbeam;
 extern crate fnv;
 extern crate float_ord;
 mod helpers;
 mod game;
@ -12,6 +13,7 @@ mod strategy;
 mod strategies {
    pub mod examples;
    pub mod cheating;
    mod hat_helpers;
    pub mod information;
 }
--- a/src/strategies/hat_helpers.rs
+++ b/src/strategies/hat_helpers.rs
@ -0,0 +1,243 @@
 use game::*;
 use helpers::*;
 #[derive(Debug,Clone)]
 pub struct ModulusInformation {
    pub modulus: u32,
    pub value: u32,
 }
 impl ModulusInformation {
    pub fn new(modulus: u32, value: u32) -> Self {
        assert!(value < modulus);
        ModulusInformation {
            modulus: modulus,
            value: value,
        }
    }
    pub fn none() -> Self {
        Self::new(1, 0)
    }
    pub fn combine(&mut self, other: Self, max_modulus: u32) {
        assert!(other.modulus <= self.info_remaining(max_modulus));
        self.value = self.value + self.modulus * other.value;
        self.modulus = std::cmp::min(max_modulus, self.modulus * other.modulus);
        assert!(self.value < self.modulus);
    }
    pub fn info_remaining(&self, max_modulus: u32) -> u32 {
        // We want to find the largest number `result` such that
        // `self.combine(other, max_modulus)` works whenever `other.modulus == result`.
        // `other.value` can be up to `result - 1`, so calling combine could increase our value to
        // up to `self.value + self.modulus * (result - 1)`, which must always be less than
        // `max_modulus`.
        // Therefore, we compute the largest number `result` such that
        // `self.value + self.modulus * (result - 1) < max_modulus`.
        let result = (max_modulus - self.value - 1) / self.modulus + 1;
        assert!(self.value + self.modulus * (result - 1) < max_modulus);
        assert!(self.value + self.modulus * ((result + 1) - 1) >= max_modulus);
        result
    }
    pub fn split(&mut self, modulus: u32) -> Self {
        assert!(self.modulus >= modulus);
        let original_modulus = self.modulus;
        let original_value = self.value;
        let value = self.value % modulus;
        self.value = self.value / modulus;
        // `self.modulus` is the largest number such that
        // `value + (self.modulus - 1) * modulus < original_modulus`.
        // TODO: find an explanation of why this makes everything work out
        self.modulus = (original_modulus - value - 1) / modulus + 1;
        assert!(original_value == value + modulus * self.value);
        Self::new(modulus, value)
    }
    pub fn cast_up(&mut self, modulus: u32) {
        assert!(self.modulus <= modulus);
        self.modulus = modulus;
    }
    // pub fn cast_down(&mut self, modulus: u32) {
    //     assert!(self.modulus >= modulus);
    //     assert!(self.value < modulus);
    //     self.modulus = modulus;
    // }
    pub fn add(&mut self, other: &Self) {
        assert!(self.modulus == other.modulus);
        self.value = (self.value + other.value) % self.modulus;
    }
    pub fn subtract(&mut self, other: &Self) {
        assert!(self.modulus == other.modulus);
        self.value = (self.modulus + self.value - other.value) % self.modulus;
    }
 }
 pub trait Question {
    // how much info does this question ask for?
    fn info_amount(&self) -> u32;
    // get the answer to this question, given cards
    fn answer(&self, &Cards, &BoardState) -> u32;
    // process the answer to this question, updating card info
    fn acknowledge_answer(
        &self, value: u32, &mut HandInfo<CardPossibilityTable>, &BoardState
    );
    fn answer_info(&self, hand: &Cards, board: &BoardState) -> ModulusInformation {
        ModulusInformation::new(
            self.info_amount(),
            self.answer(hand, board)
        )
    }
    fn acknowledge_answer_info(
        &self,
        answer: ModulusInformation,
        hand_info: &mut HandInfo<CardPossibilityTable>,
        board: &BoardState
    ) {
        assert!(self.info_amount() == answer.modulus);
        self.acknowledge_answer(answer.value, hand_info, board);
    }
 }
 pub trait PublicInformation: Clone {
    fn get_player_info(&self, &Player) -> HandInfo<CardPossibilityTable>;
    fn set_player_info(&mut self, &Player, HandInfo<CardPossibilityTable>);
    fn new(&BoardState) -> Self;
    fn set_board(&mut self, &BoardState);
    /// If we store more state than just `HandInfo<CardPossibilityTable>`s, update it after `set_player_info` has been called.
    fn update_other_info(&mut self) {
    }
    fn agrees_with(&self, other: Self) -> bool;
    /// By defining `ask_question`, we decides which `Question`s a player learns the answers to.
    ///
    /// Whenever we need to compute a "hat value", this method will be called repeatedly, either
    /// until the information runs out, or until it returns `None`. These questions can depend on
    /// the answers to earlier questions: We are given a `&HandInfo<CardPossibilityTable>` that
    /// reflect the answers of previous questions for the same "hat value computation".
    ///
    /// Note that `self` does not reflect the answers to previous questions; it reflects the state
    /// before the entire "hat value" calculation.
    fn ask_question(&self, &Player, &HandInfo<CardPossibilityTable>, total_info: u32) -> Option<Box<Question>>;
    fn ask_question_wrapper(&self, player: &Player, hand_info: &HandInfo<CardPossibilityTable>, total_info: u32)
        -> Option<Box<Question>>
    {
        assert!(total_info > 0);
        if total_info == 1 {
            None
        } else {
            let result = self.ask_question(player, hand_info, total_info);
            if let Some(ref question) = result {
                if question.info_amount() > total_info {
                    panic!("ask_question returned question with info_amount = {} > total_info = {}!",
                           question.info_amount(), total_info);
                }
                if question.info_amount() == 1 {
                    panic!("ask_question returned a trivial question!");
                }
            }
            result
        }
    }
    fn set_player_infos(&mut self, infos: Vec<(Player, HandInfo<CardPossibilityTable>)>) {
        for (player, new_hand_info) in infos {
            self.set_player_info(&player, new_hand_info);
        }
        self.update_other_info();
    }
    fn get_hat_info_for_player(
        &self, player: &Player, hand_info: &mut HandInfo<CardPossibilityTable>, total_info: u32, view: &OwnedGameView
    ) -> ModulusInformation {
        assert!(player != &view.player);
        let mut answer_info = ModulusInformation::none();
        while let Some(question) = self.ask_question_wrapper(player, hand_info, answer_info.info_remaining(total_info)) {
            let new_answer_info = question.answer_info(view.get_hand(player), view.get_board());
            question.acknowledge_answer_info(new_answer_info.clone(), hand_info, view.get_board());
            answer_info.combine(new_answer_info, total_info);
        }
        answer_info.cast_up(total_info);
        answer_info
    }
    fn update_from_hat_info_for_player(
        &self,
        player: &Player,
        hand_info: &mut HandInfo<CardPossibilityTable>,
        board: &BoardState,
        mut info: ModulusInformation,
    ) {
        while let Some(question) = self.ask_question_wrapper(player, hand_info, info.modulus) {
            let answer_info = info.split(question.info_amount());
            question.acknowledge_answer_info(answer_info, hand_info, board);
        }
        assert!(info.value == 0);
    }
    /// When deciding on a move, if we can choose between `total_info` choices,
    /// `self.get_hat_sum(total_info, view)` tells us which choice to take, and at the same time
    /// mutates `self` to simulate the choice becoming common knowledge.
    fn get_hat_sum(&mut self, total_info: u32, view: &OwnedGameView) -> ModulusInformation {
        let (infos, new_player_hands): (Vec<_>, Vec<_>) = view.get_other_players().iter().map(|player| {
            let mut hand_info = self.get_player_info(player);
            let info = self.get_hat_info_for_player(player, &mut hand_info, total_info, view);
            (info, (player.clone(), hand_info))
        }).unzip();
        self.set_player_infos(new_player_hands);
        infos.into_iter().fold(
            ModulusInformation::new(total_info, 0),
            |mut sum_info, info| {
                sum_info.add(&info);
                sum_info
            }
        )
    }
    /// When updating on a move, if we infer that the player making the move called `get_hat_sum()`
    /// and got the result `info`, we can call `self.update_from_hat_sum(info, view)` to update
    /// from that fact.
    fn update_from_hat_sum(&mut self, mut info: ModulusInformation, view: &OwnedGameView) {
        let info_source = view.board.player;
        let (other_infos, mut new_player_hands): (Vec<_>, Vec<_>) = view.get_other_players().into_iter().filter(|player| {
            *player != info_source
        }).map(|player| {
            let mut hand_info = self.get_player_info(&player);
            let player_info = self.get_hat_info_for_player(&player, &mut hand_info, info.modulus, view);
            (player_info, (player.clone(), hand_info))
        }).unzip();
        for other_info in other_infos {
            info.subtract(&other_info);
        }
        let me = view.player;
        if me == info_source {
            assert!(info.value == 0);
        } else {
            let mut my_hand = self.get_player_info(&me);
            self.update_from_hat_info_for_player(&me, &mut my_hand, &view.board, info);
            new_player_hands.push((me, my_hand));
        }
        self.set_player_infos(new_player_hands);
    }
    fn get_private_info(&self, view: &OwnedGameView) -> HandInfo<CardPossibilityTable> {
        let mut info = self.get_player_info(&view.player);
        for card_table in info.iter_mut() {
            for (_, hand) in &view.other_hands {
                for card in hand {
                    card_table.decrement_weight_if_possible(card);
                }
            }
        }
        info
    }
 }
--- a/src/strategies/information.rs
+++ b/src/strategies/information.rs