Add comments to DB queries for statistics

This commit is contained in:
Maximilian Keßler 2023-11-24 12:33:57 +01:00
parent 18607699c9
commit 058d87f5bf
Signed by: max
GPG key ID: BCC5A619923C0BA5

View file

@ -134,6 +134,8 @@ def update_user_statistics():
" (EXCLUDED.total_game_moves, EXCLUDED.games_played, EXCLUDED.games_won, EXCLUDED.current_streak, EXCLUDED.total_bdr, EXCLUDED.total_crits_lots)", " (EXCLUDED.total_game_moves, EXCLUDED.games_played, EXCLUDED.games_won, EXCLUDED.current_streak, EXCLUDED.total_bdr, EXCLUDED.total_crits_lots)",
(rating_type,) (rating_type,)
) )
# Most of the statistics are very easy to compute: We just have to accumulate data from other tables
cur.execute( cur.execute(
"INSERT INTO user_statistics (user_id, variant_type, total_game_moves, games_played, games_won, total_bdr, total_crits_lots)" "INSERT INTO user_statistics (user_id, variant_type, total_game_moves, games_played, games_won, total_bdr, total_crits_lots)"
" (" " ("
@ -141,9 +143,9 @@ def update_user_statistics():
" users.id," " users.id,"
" CASE WHEN clue_starved THEN %s ELSE %s END," " CASE WHEN clue_starved THEN %s ELSE %s END,"
" SUM(games.num_turns)," " SUM(games.num_turns),"
" COUNT(*)," " COUNT(*)," # This counts the number of rows (per user id), so the number of played game
" COUNT(*) FILTER ( WHERE variants.num_suits * 5 = games.score )," " COUNT(*) FILTER ( WHERE variants.num_suits * 5 = games.score )," # Same, but only count wins now
" SUM (game_statistics.num_bottom_deck_risks)," " SUM (game_statistics.num_bottom_deck_risks)," # Simple accumulation of the game stats
" SUM (game_statistics.num_crits_lost)" " SUM (game_statistics.num_crits_lost)"
"FROM users" "FROM users"
" INNER JOIN game_participants " " INNER JOIN game_participants "
@ -163,6 +165,20 @@ def update_user_statistics():
" (EXCLUDED.total_game_moves, EXCLUDED.games_played, EXCLUDED.games_won, EXCLUDED.total_bdr, EXCLUDED.total_crits_lots)", " (EXCLUDED.total_game_moves, EXCLUDED.games_played, EXCLUDED.games_won, EXCLUDED.total_bdr, EXCLUDED.total_crits_lots)",
(utils.get_rating_type(True), utils.get_rating_type(False)) (utils.get_rating_type(True), utils.get_rating_type(False))
) )
# This computes the maximum streak lengths, it's quite complicated.
# First (in the innermost select clause), we build up an auxiliary table, which consists of some joined data that
# we are interested in, but most importantly each row gets a 'group_id' entry in such a way that games belonging
# to the same streak will have the same group_id:
# In ascending league_id order, this entry counts the number of *losses* up until this point: Therefore, the number
# does not increase during win streaks, but increases for each loss.
# Additionally, we subtract 1 from this sum for lost games, so that losses always have the same group id as the last
# win immediately before them. Therefore, each group (= entries with the same group id) now consists of
# some consecutive wins, optionally followed by a loss
# In the second query, we can now use these group ids to add a 'streak_length' to each row by numbering the rows
# inside their corresponding group (his is what the OVER (PARTITION BY ..., group_id) does.
# Now, in a third select statement, it is now easy to calculate the maximum streak by taking the maximum of this
# row ranging over all games, where we grouped by user id and rating type (Clue Starved/Non-CS currently)
# Finally, we just wrap the computed data into an insert statement to directly store it in the statistics table
cur.execute( cur.execute(
"INSERT INTO user_statistics (user_id, variant_type, current_streak)" "INSERT INTO user_statistics (user_id, variant_type, current_streak)"
" (" " ("
@ -173,6 +189,8 @@ def update_user_statistics():
" (" " ("
" SELECT" " SELECT"
" *," " *,"
# Note that here we have the extra distinction to only add a streak_length to wins, not losses.
# Otherwise, a streak of n games would result in a loss that has 'streak' n + 1, which is not what we want.
" CASE" " CASE"
" WHEN num_suits * 5 = score" " WHEN num_suits * 5 = score"
" THEN" " THEN"
@ -188,6 +206,7 @@ def update_user_statistics():
" variants.num_suits," " variants.num_suits,"
" games.score," " games.score,"
" games.league_id," " games.league_id,"
# This count function is the tricky part that labels each game with the group_id of the streak it belongs to
" COUNT(*) " " COUNT(*) "
" FILTER (WHERE variants.num_suits * 5 != games.score)" " FILTER (WHERE variants.num_suits * 5 != games.score)"
" OVER (PARTITION BY users.id, variants.clue_starved ORDER BY games.league_id)" " OVER (PARTITION BY users.id, variants.clue_starved ORDER BY games.league_id)"