rework downloading of variants: store progress of download in database to resume downloads

This commit is contained in:
Maximilian Keßler 2023-05-13 23:07:10 +02:00
parent 0ffb323b7f
commit 5eac42e5c0
Signed by: max
GPG key ID: BCC5A619923C0BA5
2 changed files with 155 additions and 89 deletions

View file

@ -131,12 +131,17 @@ CREATE INDEX variants_name_idx ON variants (name);
*/ */
DROP TABLE IF EXISTS variant_suits CASCADE; DROP TABLE IF EXISTS variant_suits CASCADE;
CREATE TABLE variant_suits ( CREATE TABLE variant_suits (
variant_id INT NOT NULL, variant_id INT NOT NULL REFERENCES variants ON DELETE CASCADE,
suit_id INT NOT NULL, suit_id INT NOT NULL REFERENCES suits ON DELETE RESTRICT,
index SMALLINT NOT NULL, index SMALLINT NOT NULL,
FOREIGN KEY (variant_id) REFERENCES variants (id) ON DELETE CASCADE,
FOREIGN KEY (suit_id) REFERENCES suits (id) ON DELETE CASCADE,
UNIQUE (variant_id, suit_id), UNIQUE (variant_id, suit_id),
UNIQUE (variant_id, index) UNIQUE (variant_id, index)
); );
CREATE INDEX variant_suits_index ON variant_suits (variant_id, index); CREATE INDEX variant_suits_index ON variant_suits (variant_id, index);
DROP TABLE IF EXISTS variant_game_downloads CASCADE;
CREATE TABLE variant_game_downloads (
variant_id INT PRIMARY KEY REFERENCES variants ON DELETE CASCADE,
last_game_id INT NOT NULL
)

View file

@ -1,100 +1,161 @@
import json import json
from typing import Dict, Optional
from site_api import get, api, replay from site_api import get, api, replay
from database import Game, store, load, commit, conn from database.database import Game, store, load, commit, conn, cur
from compress import compress_deck, compress_actions, DeckCard, Action from compress import compress_deck, compress_actions, DeckCard, Action, InvalidFormatError
from variants import variant_id from variants import variant_id, variant_name
from hanabi import HanabiInstance, GameState, Action from hanab_live import HanabLiveInstance, HanabLiveGameState
with open('variants.json') as f: with open('variants.json') as f:
variants = json.loads(f.read()) variants = json.loads(f.read())
def download_games(variant_id, name=None):
url = "variants/{}".format(variant_id)
r = api(url)
if not r:
print("Not a valid variant: {}".format(variant_id))
return
num_entries = r['total_rows']
print("Downloading {} entries for variant {} ({})".format(num_entries, variant_id, name))
num_pages = (num_entries + 99) // 100
for page in range(0, num_pages):
print("Downloading page {} of {}".format(page + 1, num_pages), end = '\r')
r = api(url + "?page={}".format(page))
for row in r['rows']:
row.pop('users')
row.pop('datetime')
g = Game(row)
g.variant_id = variant_id
store(g)
print()
print('Downloaded and stored {} entries for variant {} ({})'.format(num_entries, variant_id, name))
commit()
# requires seed AND game to already have an entry in database #
# return: (successfully exported game, game without cheat options, null if not exported) def detailed_export_game(game_id: int, score: Optional[int] = None, seed_exists: bool = False) -> None:
def export_game(game_id) -> [bool, bool]: """
with conn.cursor() as cur: Downloads full details of game, inserts seed and game into DB
cur.execute("SELECT deck_plays, one_extra_card, one_less_card, all_or_nothing, actions FROM games WHERE id = (%s)", (game_id,)) If seed is already present, it is left as is
res = cur.fetchall() If game is already present, game details will be updated
if len(res) == 1:
print(res)
return
else:
print('game is completely new')
# return
r = get("export/{}".format(game_id)) :param game_id:
if r is None: :param score: If given, this will be inserted as score of the game. If not given, score is calculated
print("Failed to export game id {}".format(game_id)) :param seed_exists: If specified and true, assumes that the seed is already present in database.
return False, None If this is not the case, call will raise a DB insertion error
assert(r['id'] == game_id) """
# print(r)
try: game_json = get("export/{}".format(game_id))
num_players = len(r['players']) assert game_json.get('id') == game_id, "Invalid response format from hanab.live"
seed = r['seed']
options = r.get('options', {}) players = game_json.get('players', [])
num_players = len(players)
seed = game_json.get('seed', None)
options = game_json.get('options', {})
var_id = variant_id(options.get('variant', 'No Variant')) var_id = variant_id(options.get('variant', 'No Variant'))
deck_plays = options.get('deckPlays', False) deck_plays = options.get('deckPlays', False)
one_extra_card = options.get('oneExtraCard', False) one_extra_card = options.get('oneExtraCard', False)
one_less_card = options.get('oneLessCard', False) one_less_card = options.get('oneLessCard', False)
all_or_nothing = options.get('allOrNothing', False) all_or_nothing = options.get('allOrNothing', False)
actions = [Action.from_json(action) for action in r['actions']] actions = [Action.from_json(action) for action in game_json.get('actions', [])]
deck = [DeckCard.from_json(card) for card in r['deck']] deck = [DeckCard.from_json(card) for card in game_json.get('deck', None)]
except KeyError:
print('Error parsing JSON when exporting game {}'.format(game_id))
raise
assert (players != [])
assert (seed is not None)
if score is None:
# need to play through the game once to find out its score # need to play through the game once to find out its score
game = GameState(HanabiInstance(deck, num_players)) game = HanabLiveGameState(HanabLiveInstance(deck, num_players, var_id))
for action in actions: for action in actions:
game.make_action(action) game.make_action(action)
score = game.score
try: try:
compressed_deck = compress_deck(deck) compressed_deck = compress_deck(deck)
except: except InvalidFormatError:
print("Failed to compress deck while exporting game {}".format(game_id)) print("Failed to compress deck while exporting game {}: {}".format(game_id, deck))
raise raise
try: try:
compressed_actions = compress_actions(actions) compressed_actions = compress_actions(actions)
except: except InvalidFormatError:
print("Failed to compress actions while exporting game {}".format(game_id)) print("Failed to compress actions while exporting game {}".format(game_id))
raise raise
with conn.cursor() as cur: if not seed_exists:
# cur.execute("UPDATE seeds SET deck=(%s) WHERE seed=(%s);", (deck, seed))
cur.execute( cur.execute(
"INSERT INTO seeds (seed, num_players, variant_id, deck)" "INSERT INTO seeds (seed, num_players, variant_id, deck)"
"VALUES (%s, %s, %s, %s)" "VALUES (%s, %s, %s, %s)"
"ON CONFLICT (seed) DO NOTHING", "ON CONFLICT (seed) DO NOTHING",
(seed, num_players, var_id, compressed_deck) (seed, num_players, var_id, compressed_deck)
) )
cur.execute(
"INSERT INTO games (id, num_players, score, seed, variant_id, deck_plays, one_extra_card, one_less_card, all_or_nothing, actions)"
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
(game_id, num_players, game.score, seed, var_id, deck_plays, one_extra_card, one_less_card, all_or_nothing, compressed_actions))
conn.commit()
return True, not any([deck_plays, one_extra_card, one_less_card, all_or_nothing])
if __name__ == "__main__": cur.execute(
export_game(961092) "INSERT INTO games ("
"id, num_players, score, seed, variant_id, deck_plays, one_extra_card, one_less_card,"
"all_or_nothing, actions"
")"
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
"ON CONFLICT (id) DO UPDATE SET ("
"deck_plays, one_extra_card, one_less_card, all_or_nothing, actions"
") = ("
"EXCLUDED.deck_plays, EXCLUDED.one_extra_card, EXCLUDED.one_less_card, EXCLUDED.all_or_nothing,"
"EXCLUDED.actions"
")",
(
game_id, num_players, score, seed, var_id, deck_plays, one_extra_card, one_less_card,
all_or_nothing, compressed_actions
)
)
def process_game_row(game: Dict, var_id):
game_id = game.get('id', None)
seed = game.get('seed', None)
num_players = game.get('num_players', None)
score = game.get('score', None)
if any(v is None for v in [game_id, seed, num_players, score]):
raise ValueError("Unknown response format on hanab.live")
cur.execute("SELECT seed FROM seeds WHERE seed = %s", (seed,))
seed_exists = cur.fetchone()
if seed_exists is not None:
cur.execute(
"INSERT INTO games (id, seed, num_players, score, variant_id)"
"VALUES"
"(%s, %s ,%s ,%s ,%s)"
"ON CONFLICT (id) DO NOTHING",
(game_id, seed, num_players, score, var_id)
)
else:
detailed_export_game(game_id, score)
def download_games(var_id):
name = variant_name(var_id)
page_size = 100
if name is None:
raise ValueError("{} is not a known variant_id.".format(var_id))
url = "variants/{}".format(var_id)
r = api(url)
if not r:
raise RuntimeError("Failed to download request from hanab.live")
num_entries = r.get('total_rows', None)
if num_entries is None:
raise ValueError("Unknown response format on hanab.live")
cur.execute(
"SELECT COUNT(*) FROM games WHERE variant_id = %s AND id <= "
"(SELECT COALESCE (last_game_id, 0) FROM variant_game_downloads WHERE variant_id = %s)",
(var_id, var_id)
)
num_already_downloaded_games = cur.fetchone()[0]
next_page = num_already_downloaded_games // page_size
last_page = (num_entries - 1) // page_size
if num_already_downloaded_games == num_entries:
print("Already downloaded all games for variant {} [{}]".format(var_id, name))
return
print(
"Downloading remaining {} (total {}) entries for variant {} [{}]".format(
num_entries - num_already_downloaded_games, num_entries, var_id, name
)
)
for page in range(next_page, last_page + 1):
r = api(url + "?col[0]=0&page={}".format(page))
rows = r.get('rows', [])
assert page == last_page or len(rows) == page_size, \
"Received unexpected row count ({}) when querying page {}".format(len(rows), page)
for row in rows:
process_game_row(row, var_id)
cur.execute(
"INSERT INTO variant_game_downloads (variant_id, last_game_id) VALUES"
"(%s, %s)"
"ON CONFLICT (variant_id) DO UPDATE SET last_game_id = EXCLUDED.last_game_id",
(var_id, r['rows'][-1]['id'])
)
conn.commit()
print('Downloaded and processed page {}'.format(page))