From 058d87f5bf0c973314c588c3cda5774f028f9f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Ke=C3=9Fler?= Date: Fri, 24 Nov 2023 12:33:57 +0100 Subject: [PATCH] Add comments to DB queries for statistics --- src/stats.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/stats.py b/src/stats.py index 6f487f5..56206c5 100644 --- a/src/stats.py +++ b/src/stats.py @@ -134,6 +134,8 @@ def update_user_statistics(): " (EXCLUDED.total_game_moves, EXCLUDED.games_played, EXCLUDED.games_won, EXCLUDED.current_streak, EXCLUDED.total_bdr, EXCLUDED.total_crits_lots)", (rating_type,) ) + + # Most of the statistics are very easy to compute: We just have to accumulate data from other tables cur.execute( "INSERT INTO user_statistics (user_id, variant_type, total_game_moves, games_played, games_won, total_bdr, total_crits_lots)" " (" @@ -141,9 +143,9 @@ def update_user_statistics(): " users.id," " CASE WHEN clue_starved THEN %s ELSE %s END," " SUM(games.num_turns)," - " COUNT(*)," - " COUNT(*) FILTER ( WHERE variants.num_suits * 5 = games.score )," - " SUM (game_statistics.num_bottom_deck_risks)," + " COUNT(*)," # This counts the number of rows (per user id), so the number of played game + " COUNT(*) FILTER ( WHERE variants.num_suits * 5 = games.score )," # Same, but only count wins now + " SUM (game_statistics.num_bottom_deck_risks)," # Simple accumulation of the game stats " SUM (game_statistics.num_crits_lost)" "FROM users" " INNER JOIN game_participants " @@ -163,6 +165,20 @@ def update_user_statistics(): " (EXCLUDED.total_game_moves, EXCLUDED.games_played, EXCLUDED.games_won, EXCLUDED.total_bdr, EXCLUDED.total_crits_lots)", (utils.get_rating_type(True), utils.get_rating_type(False)) ) + # This computes the maximum streak lengths, it's quite complicated. + # First (in the innermost select clause), we build up an auxiliary table, which consists of some joined data that + # we are interested in, but most importantly each row gets a 'group_id' entry in such a way that games belonging + # to the same streak will have the same group_id: + # In ascending league_id order, this entry counts the number of *losses* up until this point: Therefore, the number + # does not increase during win streaks, but increases for each loss. + # Additionally, we subtract 1 from this sum for lost games, so that losses always have the same group id as the last + # win immediately before them. Therefore, each group (= entries with the same group id) now consists of + # some consecutive wins, optionally followed by a loss + # In the second query, we can now use these group ids to add a 'streak_length' to each row by numbering the rows + # inside their corresponding group (his is what the OVER (PARTITION BY ..., group_id) does. + # Now, in a third select statement, it is now easy to calculate the maximum streak by taking the maximum of this + # row ranging over all games, where we grouped by user id and rating type (Clue Starved/Non-CS currently) + # Finally, we just wrap the computed data into an insert statement to directly store it in the statistics table cur.execute( "INSERT INTO user_statistics (user_id, variant_type, current_streak)" " (" @@ -173,6 +189,8 @@ def update_user_statistics(): " (" " SELECT" " *," + # Note that here we have the extra distinction to only add a streak_length to wins, not losses. + # Otherwise, a streak of n games would result in a loss that has 'streak' n + 1, which is not what we want. " CASE" " WHEN num_suits * 5 = score" " THEN" @@ -188,6 +206,7 @@ def update_user_statistics(): " variants.num_suits," " games.score," " games.league_id," + # This count function is the tricky part that labels each game with the group_id of the streak it belongs to " COUNT(*) " " FILTER (WHERE variants.num_suits * 5 != games.score)" " OVER (PARTITION BY users.id, variants.clue_starved ORDER BY games.league_id)"