hanabi-league/src/stats.py

231 lines
10 KiB
Python
Raw Normal View History

2023-11-23 18:11:34 +01:00
import enum
from typing import List, Tuple, Set
2023-11-23 18:11:34 +01:00
from hanabi import hanab_game
2023-11-23 21:33:32 +01:00
import utils
2023-11-23 18:31:59 +01:00
from database import conn_manager
import games_db_interface
from log_setup import logger
2023-11-23 18:31:59 +01:00
2023-11-23 18:11:34 +01:00
class GameOutcome(enum.Enum):
win = 0
discard_crit = 1
bomb_crit = 2
strikeout = 3
bottom_deck = 4
vote_to_kill = 5
out_of_pace = 6
class GameAnalysisResult:
def __init__(self,
outcomes: Set[GameOutcome],
bdrs: List[Tuple[hanab_game.DeckCard, int]],
lost_crits: List[hanab_game.DeckCard]
):
2023-11-23 18:11:34 +01:00
self.outcome = GameOutcome
self.bdrs = bdrs
self.lost_crits = lost_crits
2023-11-23 18:11:34 +01:00
def analyze_replay(instance: hanab_game.HanabiInstance, actions: List[hanab_game.Action]) -> GameAnalysisResult:
2023-11-23 18:11:34 +01:00
# List of bdrs
bdrs = []
# This is the default value if we find no other reason why the game was lost (or won)
outcomes = set()
lost_crits = []
2023-11-23 18:11:34 +01:00
game = hanab_game.GameState(instance)
def handle_lost_card(card, game, play: bool):
if not game.is_trash(card):
if game.is_critical(card):
outcomes.add(GameOutcome.bomb_crit if play else GameOutcome.discard_crit)
lost_crits.append(card)
2023-11-23 18:11:34 +01:00
elif card.rank != 1:
if card in game.deck[game.progress:]:
bdrs.append((card, game.draw_pile_size))
else:
if game.deck[game.progress:].count(card) == 2:
bdrs.append((card, game.draw_pile_size))
for action in actions:
if action.type == hanab_game.ActionType.Discard:
discarded_card = instance.deck[action.target]
handle_lost_card(discarded_card, game, False)
if action.type == hanab_game.ActionType.Play:
played_card = instance.deck[action.target]
if not game.is_playable(played_card) and not game.is_trash(played_card):
bombed_card = instance.deck[action.target]
handle_lost_card(bombed_card, game, True)
game.make_action(action)
if game.pace < 0:
outcomes.add(GameOutcome.out_of_pace)
2023-11-23 18:11:34 +01:00
if game.strikes == 3:
outcomes.add(GameOutcome.strikeout)
2023-11-23 18:11:34 +01:00
elif actions[-1].type in [hanab_game.ActionType.EndGame, hanab_game.ActionType.VoteTerminate]:
outcomes.add(GameOutcome.vote_to_kill)
2023-11-23 18:11:34 +01:00
if game.score == 5 * instance.num_suits:
outcomes.add(GameOutcome.win)
2023-11-23 18:11:34 +01:00
return GameAnalysisResult(outcomes, bdrs, lost_crits)
def analyze_game_and_store_stats(game_id: int):
instance, actions = games_db_interface.load_game(game_id)
analysis = analyze_replay(instance, actions)
cur = conn_manager.get_new_cursor()
cur.execute(
"INSERT INTO game_statistics (game_id, num_bottom_deck_risks, num_crits_lost) "
"VALUES (%s, %s, %s) "
"ON CONFLICT (game_id) DO UPDATE "
2023-11-24 12:22:53 +01:00
"SET (num_bottom_deck_risks, num_crits_lost) = (EXCLUDED.num_bottom_deck_risks, EXCLUDED.num_crits_lost)",
(game_id, len(analysis.bdrs), len(analysis.lost_crits))
)
conn_manager.get_connection().commit()
def analyze_all_games():
cur = conn_manager.get_new_cursor()
cur.execute(
"SELECT id FROM games "
"LEFT OUTER JOIN game_statistics "
" ON games.id = game_statistics.game_id "
"WHERE game_statistics.game_id IS NULL "
"ORDER BY games.id"
)
for (game_id, ) in cur.fetchall():
analyze_game_and_store_stats(game_id)
2023-11-23 18:31:59 +01:00
2023-11-23 21:33:32 +01:00
def update_user_statistics():
2023-11-23 18:31:59 +01:00
"""
Update the cumulative user statistics for this user, assuming that the corresponding game statistics have
been computed already.
@param user_ids:
@return:
"""
2023-11-23 21:33:32 +01:00
# Note that some of these statistics could be computed by updating them on each new game insertion.
# However, it would be tedious to ensure that *every* new game triggers an update of these statistics.
# Also, this would be error-prone, since doing a mistake once means that values will be off forever
# (unless the DB is reset).
# Since it is cheap to accumulate some values over the whole DB, we therefore recreate the statistics as a whole,
# reusing only the individual results (that never change and therefore can only be missing, but never wrong)
cur = conn_manager.get_new_cursor()
# Update total number of moves
for clue_starved in [True, False]:
2023-11-23 22:05:46 +01:00
rating_type = utils.get_rating_type(clue_starved)
2023-11-23 21:33:32 +01:00
# We insert 0 here to ensure that we have an entry for each player
# Note that this will immediately be changed by the next query in case it is nonzero,
# so the zero value never shows up in the database if it was nonzero before.
cur.execute(
2023-11-24 11:20:55 +01:00
"INSERT INTO user_statistics"
" (user_id, variant_type, total_game_moves, games_played, games_won, current_streak, maximum_streak, total_bdr, total_crits_lots)"
2023-11-23 21:33:32 +01:00
" ("
" SELECT id, %s, 0, 0, 0, 0, 0, 0, 0 FROM users"
2023-11-23 21:33:32 +01:00
" )"
"ON CONFLICT (user_id, variant_type) DO UPDATE "
2023-11-23 22:05:46 +01:00
"SET"
" (total_game_moves, games_played, games_won, current_streak, maximum_streak, total_bdr, total_crits_lots)"
2023-11-23 22:05:46 +01:00
" ="
" (EXCLUDED.total_game_moves, EXCLUDED.games_played, EXCLUDED.games_won, EXCLUDED.current_streak, EXCLUDED.maximum_streak, EXCLUDED.total_bdr, EXCLUDED.total_crits_lots)",
2023-11-23 22:05:46 +01:00
(rating_type,)
2023-11-23 21:33:32 +01:00
)
# Most of the statistics are very easy to compute: We just have to accumulate data from other tables
2023-11-24 11:20:55 +01:00
cur.execute(
2023-11-24 12:22:53 +01:00
"INSERT INTO user_statistics (user_id, variant_type, total_game_moves, games_played, games_won, total_bdr, total_crits_lots)"
2023-11-24 11:20:55 +01:00
" ("
" SELECT"
" users.id,"
" CASE WHEN clue_starved THEN %s ELSE %s END,"
" SUM(games.num_turns),"
" COUNT(*)," # This counts the number of rows (per user id), so the number of played game
" COUNT(*) FILTER ( WHERE variants.num_suits * 5 = games.score )," # Same, but only count wins now
" SUM (game_statistics.num_bottom_deck_risks)," # Simple accumulation of the game stats
2023-11-24 12:22:53 +01:00
" SUM (game_statistics.num_crits_lost)"
2023-11-24 11:20:55 +01:00
"FROM users"
" INNER JOIN game_participants "
" ON game_participants.user_id = users.id "
" INNER JOIN games "
" ON game_participants.game_id = games.id "
" INNER JOIN variants"
" ON variants.id = games.variant_id "
2023-11-24 11:42:37 +01:00
" LEFT OUTER JOIN game_statistics"
" ON games.id = game_statistics.game_id"
2023-11-24 11:20:55 +01:00
" GROUP BY users.id, clue_starved "
" ) "
"ON CONFLICT (user_id, variant_type) DO UPDATE "
"SET"
2023-11-24 12:22:53 +01:00
" (total_game_moves, games_played, games_won, total_bdr, total_crits_lots)"
2023-11-24 11:20:55 +01:00
" ="
2023-11-24 12:22:53 +01:00
" (EXCLUDED.total_game_moves, EXCLUDED.games_played, EXCLUDED.games_won, EXCLUDED.total_bdr, EXCLUDED.total_crits_lots)",
2023-11-24 11:20:55 +01:00
(utils.get_rating_type(True), utils.get_rating_type(False))
)
# This computes the maximum streak lengths, it's quite complicated.
# First (in the innermost select clause), we build up an auxiliary table, which consists of some joined data that
# we are interested in, but most importantly each row gets a 'group_id' entry in such a way that games belonging
# to the same streak will have the same group_id:
# In ascending league_id order, this entry counts the number of *losses* up until this point: Therefore, the number
# does not increase during win streaks, but increases for each loss.
# Additionally, we subtract 1 from this sum for lost games, so that losses always have the same group id as the last
# win immediately before them. Therefore, each group (= entries with the same group id) now consists of
# some consecutive wins, optionally followed by a loss
# In the second query, we can now use these group ids to add a 'streak_length' to each row by numbering the rows
# inside their corresponding group (his is what the OVER (PARTITION BY ..., group_id) does.
# Now, in a third select statement, it is now easy to calculate the maximum streak by taking the maximum of this
# row ranging over all games, where we grouped by user id and rating type (Clue Starved/Non-CS currently)
# Finally, we just wrap the computed data into an insert statement to directly store it in the statistics table
2023-11-24 11:12:58 +01:00
cur.execute(
"INSERT INTO user_statistics (user_id, variant_type, maximum_streak)"
2023-11-24 11:12:58 +01:00
" ("
" SELECT"
" user_id,"
" CASE WHEN clue_starved THEN %s ELSE %s END,"
" MAX(streak_length) AS max_streak_length FROM"
" ("
" SELECT"
" *,"
# Note that here we have the extra distinction to only add a streak_length to wins, not losses.
# Otherwise, a streak of n games would result in a loss that has 'streak' n + 1, which is not what we want.
2023-11-24 11:12:58 +01:00
" CASE"
" WHEN num_suits * 5 = score"
" THEN"
" COUNT(*)"
" OVER (PARTITION BY user_id, clue_starved, group_id ORDER BY league_id)"
" END"
" AS streak_length "
" FROM"
" ("
" SELECT"
" users.id AS user_id,"
" variants.clue_starved,"
" variants.num_suits,"
" games.score,"
" games.league_id,"
# This count function is the tricky part that labels each game with the group_id of the streak it belongs to
2023-11-24 11:12:58 +01:00
" COUNT(*) "
" FILTER (WHERE variants.num_suits * 5 != games.score)"
" OVER (PARTITION BY users.id, variants.clue_starved ORDER BY games.league_id)"
" - CASE WHEN variants.num_suits * 5 != games.score THEN 1 ELSE 0 END"
" AS group_id"
" FROM users "
" INNER JOIN game_participants "
" ON game_participants.user_id = users.id "
" INNER JOIN games "
" ON game_participants.game_id = games.id "
" INNER JOIN variants "
" ON variants.id = games.variant_id "
" ) AS games_grouped_by_streak "
" ) AS games_with_streaks "
" GROUP BY user_id, clue_starved"
" )"
"ON CONFLICT (user_id, variant_type) DO UPDATE "
"SET maximum_streak = EXCLUDED.maximum_streak",
2023-11-24 11:12:58 +01:00
(utils.get_rating_type(True), utils.get_rating_type(False))
)
2023-11-23 21:33:32 +01:00
conn_manager.get_connection().commit()