From a427a2575e9db889afb2d56711e49e8eb7e61ed4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Ke=C3=9Fler?= <git@maximilian-kessler.de>
Date: Thu, 25 May 2023 17:00:18 +0200
Subject: [PATCH] start impl of better greedy solver

---
 greedy_solver.py | 105 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 100 insertions(+), 5 deletions(-)

diff --git a/greedy_solver.py b/greedy_solver.py
index 45678f5..448aa0c 100755
--- a/greedy_solver.py
+++ b/greedy_solver.py
@@ -3,6 +3,7 @@ import collections
 import sys
 from enum import Enum
 from log_setup import logger
+from typing import Tuple, List, Optional
 from time import sleep
 
 from hanabi import DeckCard, Action, ActionType, GameState, HanabiInstance
@@ -14,7 +15,8 @@ class CardType(Enum):
     Trash = 0
     Playable = 1
     Critical = 2
-    Dispensable = 3
+    DuplicateVisible = 3
+    UniqueVisible = 4
 
 
 class CardState():
@@ -31,8 +33,10 @@ class CardState():
                 return "Playable ({}) with weight {}".format(self.card, self.weight)
             case CardType.Critical:
                 return "Critical ({})".format(self.card)
-            case CardType.Dispensable:
-                return "Dispensable ({}) with weight {}".format(self.card, self.weight)
+            case CardType.DuplicateVisible:
+                return "Useful (duplicate visible) ({}) with weight {}".format(self.card, self.weight)
+            case CardType.UniqueVisible:
+                return "Useful (unique visible) ({}) with weight {}".format(self.card, self.weight)
 
 
 # TODO
@@ -45,7 +49,92 @@ def card_type(game_state, card):
     elif card.rank == 5 or card in game_state.trash:
         return CardType.Critical
     else:
-        return CardType.Dispensable
+        visible_cards = sum((game_state.hands[player] for player in range(game_state.num_players)), [])
+        if visible_cards.count(card) >= 2:
+            return CardType.DuplicateVisible
+        else:
+            return CardType.UniqueVisible
+
+
+class WeightedCard:
+    def __init__(self, card, weight: Optional[int] = None):
+        self.card = card
+        self.weight = weight
+
+    def __repr__(self):
+        return "{} with weight {}".format(self.card, self.weight)
+
+
+class HandState:
+    def __init__(self, player: int, game_state: GameState):
+        self.trash = []
+        self.playable = []
+        self.critical = []
+        self.dupes = []
+        self.uniques = []
+        for card in game_state.hands[player]:
+            match card_type(game_state, card):
+                case CardType.Trash:
+                    self.trash.append(WeightedCard(card))
+                case CardType.Playable:
+                    if card not in map(lambda c: c.card, self.playable):
+                        self.playable.append(WeightedCard(card))
+                    else:
+                        self.trash.append(card)
+                case CardType.Critical:
+                    self.critical.append(WeightedCard(card))
+                case CardType.UniqueVisible:
+                    self.uniques.append(WeightedCard(card))
+                case CardType.DuplicateVisible:
+                    copy = next((w for w in self.dupes if w.card == card), None)
+                    if copy is not None:
+                        self.dupes.remove(copy)
+                        self.critical.append(copy)
+                        self.trash.append(card)
+                    else:
+                        self.dupes.append(WeightedCard(card))
+        self.playable.sort(key=lambda c: c.card.rank)
+        self.dupes.sort(key=lambda c: c.card.rank)
+        self.uniques.sort(key=lambda c: c.card.rank)
+        if len(self.trash) > 0:
+            self.best_discard = self.trash[0]
+            self.discard_badness = 0
+        elif len(self.dupes) > 0:
+            self.best_discard = self.dupes[0]
+            self.discard_badness = 8 - game_state.num_players
+        elif len(self.uniques) > 0:
+            self.best_discard = self.uniques[-1]
+            self.discard_badness = 80 - 10 * self.best_discard.card.rank
+        elif len(self.playable) > 0:
+            self.best_discard = self.playable[-1]
+            self.discard_badness = 80 - 10 * self.best_discard.card.rank
+        else:
+            assert len(self.critical) > 0, "Programming error."
+            self.best_discard = self.critical[-1]
+            self.discard_badness = 600 - 100*self.best_discard.card.rank
+
+    def num_useful_cards(self):
+        return len(self.dupes) + len(self.uniques) + len(self.playable) + len(self.critical)
+
+
+class CheatingStrategy:
+    def __init__(self, game_state: GameState):
+        self.game_state = game_state
+
+    def make_move(self):
+        hand_states = [HandState(player, self.game_state) for player in range(self.game_state.num_players)]
+
+        modified_pace = self.game_state.pace - sum(
+            1 for state in hand_states if len(state.trash) == self.game_state.hand_size
+        )
+
+        cur_hand = hand_states[self.game_state.turn]
+
+        print([state.__dict__ for state in hand_states])
+        print(self.game_state.pace)
+        exit(0)
+
+
 
 
 class GreedyStrategy():
@@ -157,7 +246,7 @@ class GreedyStrategy():
 
 def run_deck(instance: HanabiInstance) -> GameState:
     gs = GameState(instance)
-    strat = GreedyStrategy(gs)
+    strat = CheatingStrategy(gs)
     while not gs.is_over():
         strat.make_move()
     return gs
@@ -190,3 +279,9 @@ def run_samples(num_players, sample_size):
     logger.info("Won {} ({}%) and lost {} ({}%) from sample of {} test games using greedy strategy.".format(
         won, round(100 * won / sample_size, 2), lost, round(100 * lost / sample_size, 2), sample_size
     ))
+
+
+if __name__ == "__main__":
+    for p in range(2, 6):
+        run_samples(p, int(sys.argv[1]))
+        print()