Skip to content

Commit

Permalink
Async operations
Browse files Browse the repository at this point in the history
  • Loading branch information
CodeFatherG committed Aug 18, 2024
1 parent 9d01969 commit 871033a
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 78 deletions.
179 changes: 101 additions & 78 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,18 @@
from datetime import datetime
import json
import os
import requests
import argparse
import asyncio
import aiohttp

def hash(text):
hash_object = hashlib.md5(text.encode())
md5_hash = hash_object.hexdigest()
return md5_hash

main_attributes = [
"id",
"name",
"type",
"desc",
"atk",
"def",
"level",
"race",
"attribute",
"scale",
"archetype",
"linkval",
"linkmarkers",
"id", "name", "type", "desc", "atk", "def", "level", "race",
"attribute", "scale", "archetype", "linkval", "linkmarkers",
]

def get_data_path():
Expand All @@ -38,72 +28,85 @@ def get_card_path(card_id):
return card_path

def process_card(card):
# Base info
card_info = {attribute: card.get(attribute) for attribute in main_attributes}
# Images
og_id = card_info["id"]
images = card["card_images"]
[main_images] = [image for image in images if image["id"] == og_id]
card_info.update(
{
"image_url": main_images["image_url"],
"image_url_small": main_images["image_url_small"],
"image_url_cropped": main_images["image_url_cropped"],
}
)
card_info.update({
"image_url": main_images["image_url"],
"image_url_small": main_images["image_url_small"],
"image_url_cropped": main_images["image_url_cropped"],
})
return card_info

def download_image(image_url, image_path):
# Check if file exists
async def download_image_async(session, image_url, image_path):
if os.path.exists(image_path):
return

print("\tDownloading " + image_url + " to " + image_path)
response = requests.get(image_url)

if not response.ok or response.status_code != 200:
print("Failed to download image " + image_url)
print(response.text)
return

with open(image_path, "wb") as f:
f.write(response.content)
print(f"\tDownloading {image_url} to {image_path}")
start_time = datetime.now()

try:
async with session.get(image_url) as response:
end_time = datetime.now()
response_time = end_time - start_time

if response_time.total_seconds() >= 1:
response_time_str = f"{response_time.total_seconds():.2f}s"
else:
response_time_str = f"{response_time.total_seconds() * 1000:.2f}ms"

if response.status != 200:
print(f"Failed to download image {image_url}: {response.status} ({response_time_str})")
return

content = await response.read()
with open(image_path, "wb") as f:
f.write(content)

print(f"\tDownloaded {image_url} to {image_path} ({response_time_str})")

except Exception as e:
print(f"Error downloading {image_url}: {str(e)}")

def download_images(card):
async def download_images_async(session, card):
card_id = card['id']
image_dir = os.path.join(get_card_path(card_id), "images")
os.makedirs(image_dir, exist_ok=True)
download_image(card['image_url'], os.path.join(image_dir, "full.jpg"))
download_image(card['image_url_small'], os.path.join(image_dir, "small.jpg"))
download_image(card['image_url_cropped'], os.path.join(image_dir, "cropped.jpg"))

tasks = [
download_image_async(session, card['image_url'], os.path.join(image_dir, "full.jpg")),
download_image_async(session, card['image_url_small'], os.path.join(image_dir, "small.jpg")),
download_image_async(session, card['image_url_cropped'], os.path.join(image_dir, "cropped.jpg"))
]
await asyncio.gather(*tasks)

def save_card_info(card):
info_path = os.path.join(get_card_path(card['id']), "info.json")

print("\tSaving card info for " + card['name'])
with open(info_path, "w") as w:
json.dump(card, w)
json.dump(card, w, indent=4)

def store_hash(card):
card_id = card['id']
card_hash = hash(json.dumps(card))
card_hash = hash(json.dumps(card, sort_keys=True))
hash_path = os.path.join(get_card_path(card_id), "hash.txt")
with open(hash_path, "w") as w:
w.write(card_hash)

def compare_hash(card) -> bool:
card_id = card['id']
card_hash = hash(json.dumps(card))
card_hash = hash(json.dumps(card, sort_keys=True))
hash_path = os.path.join(get_card_path(card_id), "hash.txt")
if not os.path.exists(hash_path):
return False
with open(hash_path, "r") as r:
old_hash = r.read()
return card_hash == old_hash

def sync_card(card):
async def sync_card_async(session, card):
save_card_info(card)
download_images(card)
await download_images_async(session, card)
store_hash(card)

def update_meta_json(start_time, end_time, cards_processed, total_cards, cards_added):
Expand All @@ -126,54 +129,74 @@ def update_meta_json(start_time, end_time, cards_processed, total_cards, cards_a
data.append(entry)

with open(meta_file, "w") as f:
json.dump(data, f, indent=2)
json.dump(data, f, indent=4)

print("Updated meta.json")
print("Time taken: " + str(end_time - start_time))
print("Cards processed: " + str(cards_processed))
print("Cards found: " + str(total_cards))
print("Cards added: " + str(cards_added))

async def process_batch(session, batch, semaphore, total_processed, total_to_process):
async def process_single_card(card):
async with semaphore:
try:
card_info = process_card(card)
if not compare_hash(card_info):
await sync_card_async(session, card_info)
return True
except Exception as e:
print(f"Error processing card {card['name']}: {e}")
return False

tasks = [asyncio.create_task(process_single_card(card)) for card in batch]
results = await asyncio.gather(*tasks)
cards_added = sum(results)

total_processed += len(batch)
print(f"Processed {total_processed}/{total_to_process} cards. Added {cards_added} new cards.")

return cards_added, total_processed

def main():
async def main_async():
parser = argparse.ArgumentParser(description='Download Yu-Gi-Oh! card data')
parser.add_argument('--card-count', type=int, help='Number of cards to download (optional)')
parser.add_argument('--card-id', type=int, help='The card id to sync (optional)')
parser.add_argument('--card-name', type=int, help='The card name to sync (optional)')
parser.add_argument('--card-id', type=int, help='The card id to sync (optional)')
parser.add_argument('--card-name', type=str, help='The card name to sync (optional)')
parser.add_argument('--batch-size', type=int, default=10, help='Number of cards to process in each batch')
args = parser.parse_args()

response = requests.get("https://db.ygoprodeck.com/api/v7/cardinfo.php")
cardinfo_json = response.json()

if args.card_count:
cards_to_process = args.card_count
else:
cards_to_process = len(cardinfo_json["data"])
async with aiohttp.ClientSession() as session:
response = await session.get("https://db.ygoprodeck.com/api/v7/cardinfo.php")
cardinfo_json = await response.json()

cards_to_process = min(args.card_count or len(cardinfo_json["data"]), len(cardinfo_json["data"]))
batch_size = args.batch_size

print(f"Processing {cards_to_process} cards...")
print(f"Processing {cards_to_process} cards in batches of {batch_size}...")

start_time = datetime.now()

cards_processed = 0
cards_added = 0
for card in cardinfo_json["data"]:
try:
print("Processing card " + card["name"] + " (" + str(cards_processed + 1) + "/" + str(cards_to_process) + ")")
card = process_card(card)

if not compare_hash(card):
sync_card(card)

cards_added += 1
if (cards_added >= cards_to_process):
break

cards_processed += 1

except Exception as e:
print("Error processing card " + card["name"] + ":", e)
total_processed = 0
semaphore = asyncio.Semaphore(10) # Limit concurrent operations

async with aiohttp.ClientSession() as session:
for i in range(0, cards_to_process, batch_size):
batch = cardinfo_json["data"][i:i+batch_size]
batch_added, total_processed = await process_batch(session, batch, semaphore, total_processed, cards_to_process)
cards_added += batch_added

if total_processed >= cards_to_process:
break

end_time = datetime.now()

update_meta_json(start_time,
end_time,
cards_processed,
cards_to_process,
total_processed,
len(cardinfo_json["data"]),
cards_added)


if __name__ == "__main__":
main()
asyncio.run(main_async())
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
requests==2.32.3
aiohttp>=3.8.4

0 comments on commit 871033a

Please sign in to comment.