diff --git a/user_scanner/email_scan/news/nytimes.py b/user_scanner/email_scan/news/nytimes.py index d55d7a0a..1393cef3 100644 --- a/user_scanner/email_scan/news/nytimes.py +++ b/user_scanner/email_scan/news/nytimes.py @@ -7,25 +7,37 @@ async def _check(email: str) -> Result: show_url = "https://nytimes.com" - + # hit this first to wake up the session and grab the token login_url = "https://myaccount.nytimes.com/auth/enter-email?response_type=cookie&client_id=vi&redirect_uri=https%3A%2F%2Fwww.nytimes.com" check_url = "https://myaccount.nytimes.com/svc/lire_ui/authorize-email/check" headers = { - 'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Mobile Safari/537.36", - 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + 'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Mobile Safari/537.36", + 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", 'Accept-Language': "en-US,en;q=0.9", + 'Accept-Encoding': "identity", + 'sec-ch-ua-platform': '"Android"', + 'sec-ch-ua': '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"', + 'sec-ch-ua-mobile': "?1" } try: - async with httpx.AsyncClient(timeout=7.0, follow_redirects=True) as client: + # NYT likes HTTP/2, helps avoid getting flagged as a bot + async with httpx.AsyncClient(timeout=12.0, follow_redirects=True, http2=True) as client: + init_res = await client.get(login_url, headers=headers) + if init_res.status_code == 403: + return Result.error("NYT blocked the initial hit (403)") + + # Digging out the auth_token from the mess of HTML/JS token_match = re.search( - r'authToken(?:"|"):(?:"|")([^&"]+)', init_res.text) + r'authToken(?:"|"|\\")\s*:\s*(?:"|"|\\")([^&"\\]+)', + init_res.text + ) if not token_match: - return Result.error("Could not extract NYT auth_token") + return Result.error("Couldn't find the auth_token in the page") auth_token = html.unescape(token_match.group(1)) @@ -37,32 +49,44 @@ async def _check(email: str) -> Result: "environment": "production" } - # Update headers for the API call + # The critical tracking/origin headers api_headers = headers.copy() api_headers.update({ 'Content-Type': "application/json", 'Accept': "application/json", 'req-details': "[[it:lui]]", 'Origin': "https://myaccount.nytimes.com", - 'Referer': login_url + 'Referer': login_url, + 'sec-fetch-site': "same-origin", + 'sec-fetch-mode': "cors", + 'sec-fetch-dest': "empty" }) - response = await client.post(check_url, content=json.dumps(payload), headers=api_headers) - data = response.json() + response = await client.post( + check_url, + content=json.dumps(payload), + headers=api_headers + ) + + if response.status_code == 403: + return Result.error("Bot detection triggered on the check (403)") + + if response.status_code != 200: + return Result.error(f"API acted up: {response.status_code}") - further_action = data.get("data", {}).get("further_action", "") + res_data = response.json() + further_action = res_data.get("data", {}).get("further_action", "") + # If it says show-login, they have an account. If show-register, they don't. if further_action == "show-login": return Result.taken(url=show_url) elif further_action == "show-register": return Result.available(url=show_url) - return Result.error("Unexpected response body, report it on github") + return Result.error(f"Got an weird action: {further_action}") - except httpx.ConnectTimeout: - return Result.error("Connection timed out!") except httpx.ReadTimeout: - return Result.error("Server took too long to respond (Read Timeout)") + return Result.error("NYT took too long to answer") except Exception as e: return Result.error(e) diff --git a/user_scanner/email_scan/travel/polarsteps.py b/user_scanner/email_scan/travel/polarsteps.py index 449643b9..eafc5e75 100644 --- a/user_scanner/email_scan/travel/polarsteps.py +++ b/user_scanner/email_scan/travel/polarsteps.py @@ -1,50 +1,56 @@ import httpx +import json from user_scanner.core.result import Result async def _check(email: str) -> Result: show_url = "https://polarsteps.com" - url = "https://www.polarsteps.com/send_password_reset" + # Switching to the login endpoint to leverage 401 vs 404 status codes + url = "https://www.polarsteps.com/api/login" payload = { - 'email': email + "username": email, + "password": "nic3_guys_finish_last" # Dummy password for existence check } headers = { - 'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Mobile Safari/537.36", + 'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Mobile Safari/537.36", + 'Accept': "application/json, text/plain, */*", 'Accept-Encoding': "identity", + 'Content-Type': "application/json", 'sec-ch-ua-platform': '"Android"', - 'sec-ch-ua': '"Not:A-Brand";v="99", "Google Chrome";v="145", "Chromium";v="145"', + 'polarsteps-api-version': "69", + 'sec-ch-ua': '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"', 'sec-ch-ua-mobile': "?1", - 'origin': "https://www.polarsteps.com", - 'sec-fetch-site': "same-origin", - 'sec-fetch-mode': "cors", - 'sec-fetch-dest': "empty", - 'referer': "https://www.polarsteps.com/forgot_password", - 'accept-language': "en-US,en;q=0.9", - 'priority': "u=1, i" + 'Origin': "https://www.polarsteps.com", + 'Referer': "https://www.polarsteps.com/login", + 'Accept-Language': "en-US,en;q=0.9,ru;q=0.8", + 'Priority': "u=1, i" } try: async with httpx.AsyncClient(timeout=10.0) as client: - response = await client.post(url, data=payload, headers=headers) - status = response.status_code + response = await client.post( + url, + content=json.dumps(payload), + headers=headers + ) - if status == 403: - return Result.error("Caught by WAF or IP Block (403)") + status = response.status_code - if status == 200: - data = response.json() + # 401 means the account exists but the password (dummy) was wrong + if status == 401: + return Result.taken(url=show_url) - if data.get("success") == "OK": - return Result.taken(url=show_url) + # 404 means the username/email is not registered in their system + if status == 404: + return Result.available(url=show_url) - error_msg = data.get("error", {}).get("email", "") - if "don't have any user" in error_msg: - return Result.available(url=show_url) + if status == 403: + return Result.error("Caught by WAF or IP Block (403)") if status == 429: - return Result.error("Rate limited by Polarsteps") + return Result.error("Rate limited by Polarsteps (429)") return Result.error(f"Unexpected status code: {status}")