Py-Hanabi/src/hanabi/live/download_data.py

370 lines
14 KiB
Python

import alive_progress
from typing import Dict, Optional, List
import psycopg2.errors
import psycopg2.extras
import platformdirs
import unidecode
from hanabi import hanab_game
from hanabi import constants
from hanabi import logger
from hanabi import database
from hanabi.live import site_api
from hanabi.live import compress
from hanabi.live import variants
from hanabi.live import hanab_live
class GameExportError(ValueError):
def __init__(self, game_id, msg):
super().__init__("When exporting game {}: {}".format(game_id, msg))
pass
class GameExportNoResponseFromSiteError(GameExportError):
def __init__(self, game_id):
super().__init__(game_id, "No response from site")
class GameExportInvalidResponseTypeError(GameExportError):
def __init__(self, game_id, response_type):
super().__init__(game_id, "Invalid response type (expected json, got {})".format(
response_type, game_id
))
pass
class GameExportInvalidFormatError(GameExportError):
def __init__(self, game_id, msg):
super().__init__(game_id, "Invalid response format: {}".format(msg))
class GameExportInvalidNumberOfPlayersError(GameExportInvalidFormatError):
def __init__(self, game_id, expected, received):
super().__init__(
game_id,
"Received invalid list of players: Expected {} many, got {}".format(expected, received)
)
def ensure_users_in_db_and_get_ids(usernames: List[str]):
normalized_usernames = [unidecode.unidecode(username) for username in usernames]
psycopg2.extras.execute_values(
database.cur,
"INSERT INTO users (username, normalized_username)"
"VALUES %s "
"ON CONFLICT (username) DO NOTHING ",
zip(usernames, normalized_usernames)
)
# To only do one DB query, we sort by the normalized username.
ids = []
for username in usernames:
database.cur.execute(
"SELECT id FROM users "
"WHERE username = %s",
(username,)
)
(id, ) = database.cur.fetchone()
ids.append(id)
return ids
#
def detailed_export_game(
game_id: int
, score: Optional[int] = None
, var_id: Optional[int] = None
, seed_exists: bool = False
) -> None:
"""
Downloads full details of game from hanab.live, inserts seed and game into DB
If seed is already present, it is left as is
If game is already present, game details will be updated
:param game_id: id of game to export
:param score: If given, this will be inserted as score of the game. If not given, score is calculated
:param var_id: If given, this will be inserted as variant id of the game. If not given, this is looked up
:param seed_exists: If specified and true, assumes that the seed is already present in database.
If this is not the case, call will raise a DB insertion error
:raises GameExportError and its child classes
"""
logger.debug("Importing game {}".format(game_id))
game_json = site_api.get("export/{}".format(game_id))
if game_json is None:
raise GameExportNoResponseFromSiteError(game_id)
if type(game_json) != dict:
raise GameExportInvalidResponseTypeError(game_id, type(game_json))
if game_json.get('id', None) != game_id:
raise GameExportInvalidFormatError(game_id, "Unexpected game_id {} received, expected {}".format(
game_json.get('id'), game_id
))
players = game_json.get('players', [])
num_players = len(players)
if num_players < 2:
raise GameExportInvalidNumberOfPlayersError(game_id, "≥2", num_players)
seed = game_json.get('seed', None)
if type(seed) != str:
raise GameExportInvalidFormatError(game_id, "Unexpected seed, expected string, got {}".format(seed))
options = game_json.get('options', {})
var_id = var_id or variants.variant_id(options.get('variant', 'No Variant'))
timed = options.get('timed', False)
time_base = options.get('timeBase', 0)
time_per_turn = options.get('timePerTurn', 0)
speedrun = options.get('speedrun', False)
card_cycle = options.get('cardCycle', False)
deck_plays = options.get('deckPlays', False)
empty_clues = options.get('emptyClues', False)
one_extra_card = options.get('oneExtraCard', False)
one_less_card = options.get('oneLessCard', False)
all_or_nothing = options.get('allOrNothing', False)
detrimental_characters = options.get('detrimentalCharacters', False)
starting_player = options.get('startingPlayer', 0)
try:
actions = [hanab_game.Action.from_json(action) for action in game_json.get('actions', [])]
except hanab_game.ParseError as e:
raise GameExportInvalidFormatError(game_id, "Failed to parse actions") from e
try:
deck = [hanab_game.DeckCard.from_json(card) for card in game_json.get('deck', None)]
except hanab_game.ParseError as e:
raise GameExportInvalidFormatError(game_id, "Failed to parse deck") from e
if score is None:
# need to play through the game once to find out its score
if detrimental_characters:
raise NotImplementedError(
"detrimental characters not supported, cannot determine score of game {}".format(game_id)
)
game = hanab_live.HanabLiveGameState(
hanab_live.HanabLiveInstance(
deck, num_players, var_id,
deck_plays=deck_plays,
one_less_card=one_less_card,
one_extra_card=one_extra_card,
all_or_nothing=all_or_nothing,
starting_player=starting_player
)
)
for action in actions:
game.make_action(action)
score = game.score
if not seed_exists:
database.cur.execute(
"INSERT INTO seeds (seed, num_players, starting_player, variant_id)"
"VALUES (%s, %s, %s, %s)"
"ON CONFLICT (seed) DO NOTHING",
(seed, num_players, starting_player, var_id)
)
logger.debug("New seed {} imported.".format(seed))
values = []
for index, card in enumerate(deck):
values.append((seed, index, card.suitIndex, card.rank))
psycopg2.extras.execute_values(
database.cur,
"INSERT INTO decks (seed, deck_index, suit_index, rank)"
"VALUES %s "
"ON CONFLICT (seed, deck_index) DO UPDATE SET "
"(suit_index, rank) = (excluded.suit_index, excluded.rank)",
values
)
database.cur.execute(
"INSERT INTO games ("
"id, num_players, starting_player, variant_id, timed, time_base, time_per_turn, speedrun, card_cycle, "
"deck_plays, empty_clues, one_extra_card, one_less_card,"
"all_or_nothing, detrimental_characters, seed, score"
")"
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
"ON CONFLICT (id) DO UPDATE SET ("
"timed, time_base, time_per_turn, speedrun, card_cycle, deck_plays, empty_clues, one_extra_card,"
"all_or_nothing, detrimental_characters"
") = ("
"EXCLUDED.timed, EXCLUDED.time_base, EXCLUDED.time_per_turn, EXCLUDED.speedrun, EXCLUDED.card_cycle, "
"EXCLUDED.deck_plays, EXCLUDED.empty_clues, EXCLUDED.one_extra_card,"
"EXCLUDED.all_or_nothing, EXCLUDED.detrimental_characters"
")",
(
game_id, num_players, starting_player, var_id, timed, time_base, time_per_turn, speedrun, card_cycle,
deck_plays, empty_clues, one_extra_card, one_less_card,
all_or_nothing, detrimental_characters, seed, score
)
)
# Insert participants into database
ids = ensure_users_in_db_and_get_ids(players)
game_participant_values = []
for index, user_id in enumerate(ids):
game_participant_values.append((game_id, user_id, index))
psycopg2.extras.execute_values(
database.cur,
"INSERT INTO game_participants (game_id, user_id, seat) VALUES %s "
"ON CONFLICT (game_id, user_id) DO UPDATE SET seat = excluded.seat",
game_participant_values
)
# Insert actions into database
action_values = []
for turn, action in enumerate(actions):
action: hanab_game.Action
action_values.append((game_id, turn, action.type.value, action.target, action.value or 0))
psycopg2.extras.execute_values(
database.cur,
"INSERT INTO game_actions (game_id, turn, type, target, value) "
"VALUES %s",
action_values
)
logger.debug("Imported game {}".format(game_id))
def _process_game_row(game: Dict, var_id, export_all_games: bool = False):
game_id = game.get('id', None)
seed = game.get('seed', None)
num_players = game.get('num_players', None)
users = game.get('users', "").split(", ")
score = game.get('score', None)
if any(v is None for v in [game_id, seed, num_players, score]):
raise ValueError("Unknown response format on hanab.live")
if len(users) != num_players:
logger.error("Invalid number of players reported when processing row {}".format(game))
f = platformdirs.user_data_dir(constants.APP_NAME, ensure_exists=True) + '/invalid_game_ids.txt'
with open(f, "a+") as invalid_games_file:
invalid_games_file.writelines(
"{}, {}, {}, {}\n".format(game_id, var_id, num_players, ", ".join(users))
)
return
# raise GameExportInvalidNumberOfPlayersError(game_id, num_players, users)
# Ensure users in database and find out their ids
if export_all_games:
detailed_export_game(game_id, score=score, var_id=var_id)
logger.debug("Imported game {}".format(game_id))
return
database.cur.execute("SAVEPOINT seed_insert")
try:
database.cur.execute(
"INSERT INTO games (id, seed, num_players, score, variant_id)"
"VALUES"
"(%s, %s ,%s ,%s ,%s)"
"ON CONFLICT (id) DO NOTHING",
(game_id, seed, num_players, score, var_id)
)
except psycopg2.errors.ForeignKeyViolation:
# Sometimes, seed is not present in the database yet, then we will have to query the full game details
# (including the seed) to export it accordingly
database.cur.execute("ROLLBACK TO seed_insert")
detailed_export_game(game_id, score=score, var_id=var_id)
database.cur.execute("RELEASE seed_insert")
logger.debug("Imported game {}".format(game_id))
def download_all_games_not_in_db(download_known_but_not_exported=True):
database.cur.execute(
"SELECT id FROM games "
+ "WHERE actions is not null" if download_known_but_not_exported else ""
+ "ORDER BY id"
)
game_ids = [game_id for (game_id,) in database.cur.fetchall()]
largest_game_id = game_ids[-1]
with alive_progress.alive_bar(
total=largest_game_id - len(game_ids),
title='Downloading all games not in database'
) as bar:
for game_id in range(1, largest_game_id):
if game_id == game_ids[0]:
game_ids = game_ids[1:]
continue
try:
detailed_export_game(game_id)
logger.info("Found new game {} that was not in DB before".format(game_id))
bar()
except GameExportNoResponseFromSiteError:
bar()
continue
def download_games(var_id, export_all_games: bool = False):
name = variants.variant_name(var_id)
page_size = 100
if name is None:
raise ValueError("{} is not a known variant_id.".format(var_id))
url = "variants/{}".format(var_id)
r = site_api.api(url, refresh=True)
if not r:
raise RuntimeError("Failed to download request from hanab.live")
num_entries = r.get('total_rows', None)
if num_entries is None:
raise ValueError("Unknown response format on hanab.live")
database.cur.execute(
"SELECT COUNT(*) FROM games WHERE variant_id = %s AND id <= "
"(SELECT COALESCE (last_game_id, 0) FROM variant_game_downloads WHERE variant_id = %s)",
(var_id, var_id)
)
num_already_downloaded_games = database.cur.fetchone()[0]
assert num_already_downloaded_games <= num_entries, "Database inconsistent, too many games present."
next_page = num_already_downloaded_games // page_size
last_page = (num_entries - 1) // page_size
if num_already_downloaded_games == num_entries:
logger.info("Already downloaded all games ({:6} many) for variant {:4} [{}]".format(num_entries, var_id, name))
return
with alive_progress.alive_bar(
total=num_entries - num_already_downloaded_games,
title='Downloading remaining games for variant id {:4} [{}]'.format(var_id, name),
enrich_print=False
) as bar:
for page in range(next_page, last_page + 1):
for refresh in [False, True]:
r = site_api.api(url + "?col[0]=0&page={}".format(page), refresh=(page == last_page) or refresh)
rows = r.get('rows', [])
if page == next_page:
rows = rows[num_already_downloaded_games % 100:]
if not (page == last_page or len(rows) == page_size):
if not refresh:
# row count does not match, maybe this is due to an old cached version of the api query,
# try again with a forced refresh of the query
logger.verbose("refreshing page {} due to unexpected row count".format(page))
continue
# If refreshing did not fix the error, log a warning
logger.warn('WARN: received unexpected row count ({}, expected {}) on page {}'.format(
len(rows), page_size, page)
)
for row in rows:
_process_game_row(row, var_id, export_all_games)
bar()
database.cur.execute(
"INSERT INTO variant_game_downloads (variant_id, last_game_id) VALUES"
"(%s, %s)"
"ON CONFLICT (variant_id) DO UPDATE SET last_game_id = EXCLUDED.last_game_id",
(var_id, r['rows'][-1]['id'])
)
database.conn.commit()
# we need this so that we don't execute the iteration with forced refresh
# if stuff already checked out without refreshing
break