From 7242fcadb096de8b756dad5b43160abf7a540d04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= <jindrichbar@gmail.com>
Date: Mon, 1 Dec 2025 09:33:00 +0100
Subject: [PATCH 1/2] feat: make Apify integration asynchronous

---
 engines/apify_api.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/engines/apify_api.py b/engines/apify_api.py
index 1bcf6f9..35bb63c 100644
--- a/engines/apify_api.py
+++ b/engines/apify_api.py
@@ -1,8 +1,8 @@
 import os
 try:
-    from apify_client import ApifyClient  # type: ignore
+    from apify_client import ApifyClientAsync  # type: ignore
 except Exception:  # pragma: no cover - allow discovery without the dependency installed
-    ApifyClient = None  # type: ignore[assignment]
+    ApifyClientAsync = None  # type: ignore[assignment]
 from .base import Scraper, ScrapeResult
 from dotenv import load_dotenv
 import logging
@@ -18,23 +18,23 @@ class ApifyAPIScraper(Scraper):
     """
     def __init__(self):
         self.api_token = os.getenv("APIFY_API_TOKEN")
-        if ApifyClient is None:
+        if ApifyClientAsync is None:
             # Keep import-time lightweight so discovery works; fail when actually used
             raise RuntimeError("apify-client is not installed. Please `pip install apify-client`. ")
         if not self.api_token:
             raise RuntimeError("APIFY_API_TOKEN environment variable not set.")
-        self.client = ApifyClient(self.api_token)
+        self.client = ApifyClientAsync(self.api_token)
         self.actor_id = "apify/web-scraper"
 
-    def scrape(self, url: str, run_id: str) -> ScrapeResult:
+    async def scrape(self, url: str, run_id: str) -> ScrapeResult:
         error = None
         html = ""
         content_size = 0
-        status_code = 500 
+        status_code = 500
         try:
             # Start the actor and wait for it to finish
             actor_client = self.client.actor(self.actor_id)
-            run_result = actor_client.call(
+            run_result = await actor_client.call(
                 run_input={
                     "startUrls": [{"url": url}],
                     "maxRequestsPerCrawl": 1,
@@ -68,7 +68,7 @@ def scrape(self, url: str, run_id: str) -> ScrapeResult:
                     error = "No HTML found in Apify dataset result."
         except Exception as e:
             error = str(e)
-        
+
         return ScrapeResult(
             run_id=run_id,
             scraper="apify_api",
@@ -79,4 +79,4 @@ def scrape(self, url: str, run_id: str) -> ScrapeResult:
             format="html",
             created_at=datetime.now().isoformat(),
             content=html,
-        ) 
+        )

From 5233a546e01b23a40bf893969e96098859e8d076 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= <jindrichbar@gmail.com>
Date: Mon, 1 Dec 2025 09:50:54 +0100
Subject: [PATCH 2/2] fix: load dataset items asynchronously

---
 engines/apify_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engines/apify_api.py b/engines/apify_api.py
index 35bb63c..e7c1a03 100644
--- a/engines/apify_api.py
+++ b/engines/apify_api.py
@@ -59,7 +59,7 @@ async def scrape(self, url: str, run_id: str) -> ScrapeResult:
             else:
                 dataset_id = run_result["defaultDatasetId"]
                 dataset_client = self.client.dataset(dataset_id)
-                items = dataset_client.list_items().items
+                items = (await dataset_client.list_items()).items
                 if items and "html" in items[0]:
                     html = items[0]["html"] or ""
                     status_code = items[0].get("status_code")