diff --git a/main.py b/main.py index e7d42d5cf9..ad68af1e66 100644 --- a/main.py +++ b/main.py @@ -2,8 +2,9 @@ from datetime import datetime import json import os -import requests import argparse +import asyncio +import aiohttp def hash(text): hash_object = hashlib.md5(text.encode()) @@ -11,19 +12,8 @@ def hash(text): return md5_hash main_attributes = [ - "id", - "name", - "type", - "desc", - "atk", - "def", - "level", - "race", - "attribute", - "scale", - "archetype", - "linkval", - "linkmarkers", + "id", "name", "type", "desc", "atk", "def", "level", "race", + "attribute", "scale", "archetype", "linkval", "linkmarkers", ] def get_data_path(): @@ -38,62 +28,75 @@ def get_card_path(card_id): return card_path def process_card(card): - # Base info card_info = {attribute: card.get(attribute) for attribute in main_attributes} - # Images og_id = card_info["id"] images = card["card_images"] [main_images] = [image for image in images if image["id"] == og_id] - card_info.update( - { - "image_url": main_images["image_url"], - "image_url_small": main_images["image_url_small"], - "image_url_cropped": main_images["image_url_cropped"], - } - ) + card_info.update({ + "image_url": main_images["image_url"], + "image_url_small": main_images["image_url_small"], + "image_url_cropped": main_images["image_url_cropped"], + }) return card_info -def download_image(image_url, image_path): - # Check if file exists +async def download_image_async(session, image_url, image_path): if os.path.exists(image_path): return - print("\tDownloading " + image_url + " to " + image_path) - response = requests.get(image_url) - - if not response.ok or response.status_code != 200: - print("Failed to download image " + image_url) - print(response.text) - return - - with open(image_path, "wb") as f: - f.write(response.content) + print(f"\tDownloading {image_url} to {image_path}") + start_time = datetime.now() + + try: + async with session.get(image_url) as response: + end_time = datetime.now() + response_time = end_time - start_time + + if response_time.total_seconds() >= 1: + response_time_str = f"{response_time.total_seconds():.2f}s" + else: + response_time_str = f"{response_time.total_seconds() * 1000:.2f}ms" + + if response.status != 200: + print(f"Failed to download image {image_url}: {response.status} ({response_time_str})") + return + + content = await response.read() + with open(image_path, "wb") as f: + f.write(content) + + print(f"\tDownloaded {image_url} to {image_path} ({response_time_str})") + + except Exception as e: + print(f"Error downloading {image_url}: {str(e)}") -def download_images(card): +async def download_images_async(session, card): card_id = card['id'] image_dir = os.path.join(get_card_path(card_id), "images") os.makedirs(image_dir, exist_ok=True) - download_image(card['image_url'], os.path.join(image_dir, "full.jpg")) - download_image(card['image_url_small'], os.path.join(image_dir, "small.jpg")) - download_image(card['image_url_cropped'], os.path.join(image_dir, "cropped.jpg")) + + tasks = [ + download_image_async(session, card['image_url'], os.path.join(image_dir, "full.jpg")), + download_image_async(session, card['image_url_small'], os.path.join(image_dir, "small.jpg")), + download_image_async(session, card['image_url_cropped'], os.path.join(image_dir, "cropped.jpg")) + ] + await asyncio.gather(*tasks) def save_card_info(card): info_path = os.path.join(get_card_path(card['id']), "info.json") - print("\tSaving card info for " + card['name']) with open(info_path, "w") as w: - json.dump(card, w) + json.dump(card, w, indent=4) def store_hash(card): card_id = card['id'] - card_hash = hash(json.dumps(card)) + card_hash = hash(json.dumps(card, sort_keys=True)) hash_path = os.path.join(get_card_path(card_id), "hash.txt") with open(hash_path, "w") as w: w.write(card_hash) def compare_hash(card) -> bool: card_id = card['id'] - card_hash = hash(json.dumps(card)) + card_hash = hash(json.dumps(card, sort_keys=True)) hash_path = os.path.join(get_card_path(card_id), "hash.txt") if not os.path.exists(hash_path): return False @@ -101,9 +104,9 @@ def compare_hash(card) -> bool: old_hash = r.read() return card_hash == old_hash -def sync_card(card): +async def sync_card_async(session, card): save_card_info(card) - download_images(card) + await download_images_async(session, card) store_hash(card) def update_meta_json(start_time, end_time, cards_processed, total_cards, cards_added): @@ -126,54 +129,74 @@ def update_meta_json(start_time, end_time, cards_processed, total_cards, cards_a data.append(entry) with open(meta_file, "w") as f: - json.dump(data, f, indent=2) + json.dump(data, f, indent=4) + + print("Updated meta.json") + print("Time taken: " + str(end_time - start_time)) + print("Cards processed: " + str(cards_processed)) + print("Cards found: " + str(total_cards)) + print("Cards added: " + str(cards_added)) + +async def process_batch(session, batch, semaphore, total_processed, total_to_process): + async def process_single_card(card): + async with semaphore: + try: + card_info = process_card(card) + if not compare_hash(card_info): + await sync_card_async(session, card_info) + return True + except Exception as e: + print(f"Error processing card {card['name']}: {e}") + return False + + tasks = [asyncio.create_task(process_single_card(card)) for card in batch] + results = await asyncio.gather(*tasks) + cards_added = sum(results) + + total_processed += len(batch) + print(f"Processed {total_processed}/{total_to_process} cards. Added {cards_added} new cards.") + + return cards_added, total_processed -def main(): +async def main_async(): parser = argparse.ArgumentParser(description='Download Yu-Gi-Oh! card data') parser.add_argument('--card-count', type=int, help='Number of cards to download (optional)') - parser.add_argument('--card-id', type=int, help='The card id to sync (optional)') - parser.add_argument('--card-name', type=int, help='The card name to sync (optional)') + parser.add_argument('--card-id', type=int, help='The card id to sync (optional)') + parser.add_argument('--card-name', type=str, help='The card name to sync (optional)') + parser.add_argument('--batch-size', type=int, default=10, help='Number of cards to process in each batch') args = parser.parse_args() - response = requests.get("https://db.ygoprodeck.com/api/v7/cardinfo.php") - cardinfo_json = response.json() - - if args.card_count: - cards_to_process = args.card_count - else: - cards_to_process = len(cardinfo_json["data"]) + async with aiohttp.ClientSession() as session: + response = await session.get("https://db.ygoprodeck.com/api/v7/cardinfo.php") + cardinfo_json = await response.json() + + cards_to_process = min(args.card_count or len(cardinfo_json["data"]), len(cardinfo_json["data"])) + batch_size = args.batch_size - print(f"Processing {cards_to_process} cards...") + print(f"Processing {cards_to_process} cards in batches of {batch_size}...") start_time = datetime.now() - cards_processed = 0 cards_added = 0 - for card in cardinfo_json["data"]: - try: - print("Processing card " + card["name"] + " (" + str(cards_processed + 1) + "/" + str(cards_to_process) + ")") - card = process_card(card) - - if not compare_hash(card): - sync_card(card) - - cards_added += 1 - if (cards_added >= cards_to_process): - break - - cards_processed += 1 - - except Exception as e: - print("Error processing card " + card["name"] + ":", e) + total_processed = 0 + semaphore = asyncio.Semaphore(10) # Limit concurrent operations + + async with aiohttp.ClientSession() as session: + for i in range(0, cards_to_process, batch_size): + batch = cardinfo_json["data"][i:i+batch_size] + batch_added, total_processed = await process_batch(session, batch, semaphore, total_processed, cards_to_process) + cards_added += batch_added + + if total_processed >= cards_to_process: + break end_time = datetime.now() update_meta_json(start_time, end_time, - cards_processed, - cards_to_process, + total_processed, + len(cardinfo_json["data"]), cards_added) - if __name__ == "__main__": - main() \ No newline at end of file + asyncio.run(main_async()) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d80d9fc2a3..dcf586433c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ requests==2.32.3 +aiohttp>=3.8.4 \ No newline at end of file