Py-Hanabi/greedy_solver.py

#! /bin/python3
import collections
import sys
from enum import Enum
from time import sleep

from hanabi import DeckCard, Action, ActionType, GameState, HanabiInstance
from compress import link, decompress_deck
from database.database import conn


class CardType(Enum):
    Trash = 0
    Playable = 1
    Critical = 2
    Dispensable = 3


class CardState():
    def __init__(self, card_type: CardType, card: DeckCard, weight=1):
        self.card_type = card_type
        self.card = card
        self.weight = weight

    def __repr__(self):
        match self.card_type:
            case CardType.Trash:
                return "Trash ({})".format(self.card)
            case CardType.Playable:
                return "Playable ({}) with weight {}".format(self.card, self.weight)
            case CardType.Critical:
                return "Critical ({})".format(self.card)
            case CardType.Dispensable:
                return "Dispensable ({}) with weight {}".format(self.card, self.weight)

# TODO
def card_type(game_state, card):
    played = game_state.stacks[card.suitIndex]
    if card.rank <= played:
        return CardType.Trash
    elif card.rank == played + 1:
        return CardType.Playable
    elif card.rank == 5 or card in game_state.trash:
        return CardType.Critical
    else:
        return CardType.Dispensable


class GreedyStrategy():
    def __init__(self, game_state: GameState):
        self.game_state = game_state

        self.earliest_draw_times = []
        for s in range(0, game_state.instance.num_suits):
            self.earliest_draw_times.append([])
            for r in range(1, 6):
                self.earliest_draw_times[s].append(max(
                        game_state.deck.index(DeckCard(s,r)) - game_state.hand_size * game_state.num_players + 1,
                        0 if r == 1 else self.earliest_draw_times[s][r - 2]
                ))

        # Currently, we do not add the time the 5 gets drawn to this, since this is rather a measurument on how
        # bad a suit is in terms of having to hold on to other cards that are not playable *yet*
        self.suit_badness = [sum(self.earliest_draw_times[s][:-1]) for s in range(0, game_state.num_suits)]

    def make_move(self):
        hand_states = [[CardState(card_type(self.game_state, card), card, None) for card in self.game_state.hands[p]] for p in range(self.game_state.num_players)]

        # find dupes in players hands, marke one card crit and the other one trash
        p = False
        for states in hand_states:
            counter = collections.Counter(map(lambda state: state.card, states))
            for card in counter:
                if counter[card] >= 2:
                    dupes = (cstate for cstate in states if cstate.card == card)
                    first = next(dupes)
                    if first.card_type == CardType.Dispensable:
                        first.card_type = CardType.Critical
                    for dupe in dupes:
                        dupe.card_type = CardType.Trash

        def hand_badness(states):
            if any(state.card_type == CardType.Playable for state in states):
                return 0
            crits = [state for state in states if state.card_type == CardType.Critical]
            crits_val = sum(map(lambda state: state.card.rank, crits))
            if any(state.card_type == CardType.Playable for state in states):
                return crits_val
        
        def player_distance(f, t):
            return ((t - f - 1) % self.game_state.num_players) + 1

        for (player, states) in enumerate(hand_states):
            for state in states:
                if state.card_type == CardType.Playable:
                    copy_holders = set(self.game_state.holding_players(state.card))
                    copy_holders.remove(player)
                    connecting_holders = set(self.game_state.holding_players(DeckCard(state.card.suitIndex, state.card.rank + 1)))

                    if len(copy_holders) == 0:
                        # card is unique, imortancy is based lexicographically on whether somebody has the conn. card and the rank
                        state.weight = (6 if len(connecting_holders) > 0 else 1) * (6 - state.card.rank)
                    else:
                        # copy is available somewhere else
                        if len(connecting_holders) == 0:
                            # card is not urgent
                            state.weight = 0.5 * (6 - state.card.rank)
                        else:
                            # there is a copy and there is a connecting card. check if they are out of order
                            turns_to_copy = min(map(lambda holder: player_distance(player, holder), copy_holders))
                            turns_to_conn = max(map(lambda holder: player_distance(player, holder), connecting_holders))
                            if turns_to_copy < turns_to_conn:
                                # our copy is not neccessary for connecting card to be able to play
                                state.weight = 0.5 * (6 - state.card.rank)
                            else:
                                # our copy is important, scale it little less than if it were unique
                                state.weight = 4 * (6 - state.card.rank)
                elif state.card_type == CardType.Dispensable:
                    try:
                        # TODO: consider duplicate in hand
                        copy_holders = list(self.game_state.holding_players(state.card))
                        copy_holders.remove(player)
                        nextCopy = self.game_state.deck[self.game_state.progress:].index(card)
                    except:
                        nextCopy = 1
#                    state.weight = self.suit_badness[state.card.suitIndex] * nextCopy + 2 * (5 - state.card.rank)
                    state.weight = nextCopy + 2 * (5 - state.card.rank)


        cur_hand = hand_states[self.game_state.turn]
        plays = [cstate for cstate in cur_hand if cstate.card_type == CardType.Playable]
        trash = next((cstate for cstate in cur_hand if cstate.card_type == CardType.Trash), None)


        # actual decision on what to do

        if len(plays) > 0:
            play = max(plays, key=lambda s: s.weight)
            self.game_state.play(play.card.deck_index)
        elif self.game_state.clues == 8:
            self.game_state.clue()
        elif trash is not None:
            self.game_state.discard(trash.card.deck_index)
        elif self.game_state.clues == 0:
            dispensable = [cstate for cstate in cur_hand if cstate.card_type == CardType.Dispensable]
            if len(dispensable) == 0:
                self.game_state.in_lost_state = True
#                raise ValueError("Lost critical card")
            else:
                discard = min(dispensable, key=lambda s: s.weight)
                self.game_state.discard(discard.card.deck_index)
        else:
            self.game_state.clue()

def test():
    # seed p4v0s148
    deck = decompress_deck("15wpspaodknlftabkpixbxiudqvrumhsgeakqucvgcrfmfhynwlj")
    gs = GameState(5, deck)
    print(gs.deck)

    strat = GreedyStrategy(gs)
    while not gs.is_over():
        strat.make_move()
#    print(strat.suit_badness)
#    print(COLORS)
#    strat.make_move()
    print(gs.actions)
    print(link(gs.to_json()))


# wins = open("won_seeds.txt", "a")
# losses = open("lost_seeds.txt", "a")
# crits = open("crits_lost.txt", "a")


def run_deck(seed, num_players, deck_str):
    deck = decompress_deck(deck_str)
    instance = HanabiInstance(deck, num_players)
    gs = GameState(instance)
    strat = GreedyStrategy(gs)
    while not gs.is_over():
        strat.make_move()
    if not gs.score == 25:
        losses.write("{}-player seed {:10} {}:\n{}\n".format(num_players, seed, str(deck), link(gs)))
        return False
    return True


def run_samples(num_players, sample_size):
    won = 0
    lost = 0
    cur = conn.cursor()
    cur.execute("SELECT seed, num_players, deck FROM seeds WHERE variant_id = 0 AND num_players = (%s) order by seed desc limit (%s)", (num_players, sample_size))
    for r in cur:
        succ = run_deck(*r)
        if succ:
            won += 1
        else:
            lost += 1
        print("won: {:4}, lost: {:4}".format(won, lost), end = "\r")
    print()
    print("Total wins: {}%".format(round(100 * won / (lost + won), 2)))

if __name__ == "__main__":
    for p in range(2, 6):
        print("Testing on {} players...".format(p))
        run_samples(p, sys.argv[1])
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00			`#! /bin/python3`
implement handling of dupes in same players hands 2023-03-14 18:15:15 +01:00			`import collections`
refactor greedy solver for new hanabi classes 2023-03-18 10:17:24 +01:00			`import sys`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`from enum import Enum`
			`from time import sleep`
add GameState class to model progress of hanabi game 2023-03-14 09:04:08 +01:00
refactor greedy solver for new hanabi classes 2023-03-18 10:17:24 +01:00			`from hanabi import DeckCard, Action, ActionType, GameState, HanabiInstance`
			`from compress import link, decompress_deck`
adapt to database package. remove creating of files upon import 2023-05-13 23:25:04 +02:00			`from database.database import conn`
add GameState class to model progress of hanabi game 2023-03-14 09:04:08 +01:00

first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`class CardType(Enum):`
			`Trash = 0`
			`Playable = 1`
			`Critical = 2`
			`Dispensable = 3`


			`class CardState():`
			`def __init__(self, card_type: CardType, card: DeckCard, weight=1):`
			`self.card_type = card_type`
			`self.card = card`
			`self.weight = weight`

			`def __repr__(self):`
			`match self.card_type:`
			`case CardType.Trash:`
			`return "Trash ({})".format(self.card)`
			`case CardType.Playable:`
			`return "Playable ({}) with weight {}".format(self.card, self.weight)`
			`case CardType.Critical:`
			`return "Critical ({})".format(self.card)`
			`case CardType.Dispensable:`
			`return "Dispensable ({}) with weight {}".format(self.card, self.weight)`

refactor greedy solver for new hanabi classes 2023-03-18 10:17:24 +01:00			`# TODO`
			`def card_type(game_state, card):`
			`played = game_state.stacks[card.suitIndex]`
			`if card.rank <= played:`
			`return CardType.Trash`
			`elif card.rank == played + 1:`
			`return CardType.Playable`
			`elif card.rank == 5 or card in game_state.trash:`
			`return CardType.Critical`
			`else:`
			`return CardType.Dispensable`
add GameState class to model progress of hanabi game 2023-03-14 09:04:08 +01:00
greedy solver: do not crash when crit is lost, just mark this 2023-03-15 15:44:03 +01:00
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`class GreedyStrategy():`
			`def __init__(self, game_state: GameState):`
			`self.game_state = game_state`

			`self.earliest_draw_times = []`
refactor greedy solver for new hanabi classes 2023-03-18 10:17:24 +01:00			`for s in range(0, game_state.instance.num_suits):`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`self.earliest_draw_times.append([])`
			`for r in range(1, 6):`
			`self.earliest_draw_times[s].append(max(`
			`game_state.deck.index(DeckCard(s,r)) - game_state.hand_size * game_state.num_players + 1,`
			`0 if r == 1 else self.earliest_draw_times[s][r - 2]`
			`))`

			`# Currently, we do not add the time the 5 gets drawn to this, since this is rather a measurument on how`
			`# bad a suit is in terms of having to hold on to other cards that are not playable yet`
			`self.suit_badness = [sum(self.earliest_draw_times[s][:-1]) for s in range(0, game_state.num_suits)]`

			`def make_move(self):`
refactor greedy solver for new hanabi classes 2023-03-18 10:17:24 +01:00			`hand_states = [[CardState(card_type(self.game_state, card), card, None) for card in self.game_state.hands[p]] for p in range(self.game_state.num_players)]`
implement handling of dupes in same players hands 2023-03-14 18:15:15 +01:00
			`# find dupes in players hands, marke one card crit and the other one trash`
compute winning percentage at end 2023-03-14 18:27:29 +01:00			`p = False`
implement handling of dupes in same players hands 2023-03-14 18:15:15 +01:00			`for states in hand_states:`
			`counter = collections.Counter(map(lambda state: state.card, states))`
			`for card in counter:`
			`if counter[card] >= 2:`
compute winning percentage at end 2023-03-14 18:27:29 +01:00			`dupes = (cstate for cstate in states if cstate.card == card)`
			`first = next(dupes)`
			`if first.card_type == CardType.Dispensable:`
			`first.card_type = CardType.Critical`
			`for dupe in dupes:`
			`dupe.card_type = CardType.Trash`
implement handling of dupes in same players hands 2023-03-14 18:15:15 +01:00
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00			`def hand_badness(states):`
			`if any(state.card_type == CardType.Playable for state in states):`
			`return 0`
			`crits = [state for state in states if state.card_type == CardType.Critical]`
			`crits_val = sum(map(lambda state: state.card.rank, crits))`
			`if any(state.card_type == CardType.Playable for state in states):`
			`return crits_val`

			`def player_distance(f, t):`
			`return ((t - f - 1) % self.game_state.num_players) + 1`

first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`for (player, states) in enumerate(hand_states):`
			`for state in states:`
			`if state.card_type == CardType.Playable:`
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00			`copy_holders = set(self.game_state.holding_players(state.card))`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`copy_holders.remove(player)`
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00			`connecting_holders = set(self.game_state.holding_players(DeckCard(state.card.suitIndex, state.card.rank + 1)))`

first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`if len(copy_holders) == 0:`
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00			`# card is unique, imortancy is based lexicographically on whether somebody has the conn. card and the rank`
			`state.weight = (6 if len(connecting_holders) > 0 else 1) * (6 - state.card.rank)`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`else:`
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00			`# copy is available somewhere else`
			`if len(connecting_holders) == 0:`
			`# card is not urgent`
			`state.weight = 0.5 * (6 - state.card.rank)`
			`else:`
			`# there is a copy and there is a connecting card. check if they are out of order`
			`turns_to_copy = min(map(lambda holder: player_distance(player, holder), copy_holders))`
			`turns_to_conn = max(map(lambda holder: player_distance(player, holder), connecting_holders))`
			`if turns_to_copy < turns_to_conn:`
			`# our copy is not neccessary for connecting card to be able to play`
			`state.weight = 0.5 * (6 - state.card.rank)`
			`else:`
			`# our copy is important, scale it little less than if it were unique`
			`state.weight = 4 * (6 - state.card.rank)`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`elif state.card_type == CardType.Dispensable:`
			`try:`
			`# TODO: consider duplicate in hand`
			`copy_holders = list(self.game_state.holding_players(state.card))`
			`copy_holders.remove(player)`
			`nextCopy = self.game_state.deck[self.game_state.progress:].index(card)`
			`except:`
			`nextCopy = 1`
implement handling of dupes in same players hands 2023-03-14 18:15:15 +01:00			`# state.weight = self.suit_badness[state.card.suitIndex] * nextCopy + 2 * (5 - state.card.rank)`
			`state.weight = nextCopy + 2 * (5 - state.card.rank)`
compute winning percentage at end 2023-03-14 18:27:29 +01:00

first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`cur_hand = hand_states[self.game_state.turn]`
			`plays = [cstate for cstate in cur_hand if cstate.card_type == CardType.Playable]`
			`trash = next((cstate for cstate in cur_hand if cstate.card_type == CardType.Trash), None)`

fix bug in replay creation: equal cards with different deck idx: replace correct one 2023-03-14 18:46:45 +01:00
			`# actual decision on what to do`

first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`if len(plays) > 0:`
			`play = max(plays, key=lambda s: s.weight)`
			`self.game_state.play(play.card.deck_index)`
			`elif self.game_state.clues == 8:`
			`self.game_state.clue()`
			`elif trash is not None:`
			`self.game_state.discard(trash.card.deck_index)`
			`elif self.game_state.clues == 0:`
			`dispensable = [cstate for cstate in cur_hand if cstate.card_type == CardType.Dispensable]`
			`if len(dispensable) == 0:`
greedy solver: do not crash when crit is lost, just mark this 2023-03-15 15:44:03 +01:00			`self.game_state.in_lost_state = True`
			`# raise ValueError("Lost critical card")`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`else:`
			`discard = min(dispensable, key=lambda s: s.weight)`
			`self.game_state.discard(discard.card.deck_index)`
			`else:`
			`self.game_state.clue()`

add GameState class to model progress of hanabi game 2023-03-14 09:04:08 +01:00			`def test():`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`# seed p4v0s148`
			`deck = decompress_deck("15wpspaodknlftabkpixbxiudqvrumhsgeakqucvgcrfmfhynwlj")`
add GameState class to model progress of hanabi game 2023-03-14 09:04:08 +01:00			`gs = GameState(5, deck)`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`print(gs.deck)`
add GameState class to model progress of hanabi game 2023-03-14 09:04:08 +01:00
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`strat = GreedyStrategy(gs)`
			`while not gs.is_over():`
			`strat.make_move()`
			`# print(strat.suit_badness)`
			`# print(COLORS)`
			`# strat.make_move()`
			`print(gs.actions)`
			`print(link(gs.to_json()))`


adapt to database package. remove creating of files upon import 2023-05-13 23:25:04 +02:00			`# wins = open("won_seeds.txt", "a")`
			`# losses = open("lost_seeds.txt", "a")`
			`# crits = open("crits_lost.txt", "a")`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00

			`def run_deck(seed, num_players, deck_str):`
			`deck = decompress_deck(deck_str)`
refactor greedy solver for new hanabi classes 2023-03-18 10:17:24 +01:00			`instance = HanabiInstance(deck, num_players)`
			`gs = GameState(instance)`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`strat = GreedyStrategy(gs)`
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00			`while not gs.is_over():`
			`strat.make_move()`
			`if not gs.score == 25:`
refactor greedy solver for new hanabi classes 2023-03-18 10:17:24 +01:00			`losses.write("{}-player seed {:10} {}:\n{}\n".format(num_players, seed, str(deck), link(gs)))`
greedy_solver: remove global variables. allow exporting games with no actions 2023-03-17 11:55:46 +01:00			`return False`
			`return True`

add GameState class to model progress of hanabi game 2023-03-14 09:04:08 +01:00
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00			`def run_samples(num_players, sample_size):`
greedy_solver: remove global variables. allow exporting games with no actions 2023-03-17 11:55:46 +01:00			`won = 0`
			`lost = 0`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`cur = conn.cursor()`
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00			`cur.execute("SELECT seed, num_players, deck FROM seeds WHERE variant_id = 0 AND num_players = (%s) order by seed desc limit (%s)", (num_players, sample_size))`
first version of greedy solver: winrate roughly 80% 2023-03-14 15:20:18 +01:00			`for r in cur:`
greedy_solver: remove global variables. allow exporting games with no actions 2023-03-17 11:55:46 +01:00			`succ = run_deck(*r)`
			`if succ:`
			`won += 1`
			`else:`
			`lost += 1`
			`print("won: {:4}, lost: {:4}".format(won, lost), end = "\r")`
implement handling of dupes in same players hands 2023-03-14 18:15:15 +01:00			`print()`
greedy_solver: remove global variables. allow exporting games with no actions 2023-03-17 11:55:46 +01:00			`print("Total wins: {}%".format(round(100 * won / (lost + won), 2)))`
better testing for greedy solver. implement better handling of copies of cards 2023-03-16 14:07:42 +01:00
			`if __name__ == "__main__":`
			`for p in range(2, 6):`
			`print("Testing on {} players...".format(p))`
			`run_samples(p, sys.argv[1])`