Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 39 additions & 15 deletions user_scanner/email_scan/news/nytimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,37 @@

async def _check(email: str) -> Result:
show_url = "https://nytimes.com"

# hit this first to wake up the session and grab the token
login_url = "https://myaccount.nytimes.com/auth/enter-email?response_type=cookie&client_id=vi&redirect_uri=https%3A%2F%2Fwww.nytimes.com"
check_url = "https://myaccount.nytimes.com/svc/lire_ui/authorize-email/check"

headers = {
'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Mobile Safari/537.36",
'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Mobile Safari/537.36",
'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
'Accept-Language': "en-US,en;q=0.9",
'Accept-Encoding': "identity",
'sec-ch-ua-platform': '"Android"',
'sec-ch-ua': '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"',
'sec-ch-ua-mobile': "?1"
}

try:
async with httpx.AsyncClient(timeout=7.0, follow_redirects=True) as client:
# NYT likes HTTP/2, helps avoid getting flagged as a bot
async with httpx.AsyncClient(timeout=12.0, follow_redirects=True, http2=True) as client:

init_res = await client.get(login_url, headers=headers)

if init_res.status_code == 403:
return Result.error("NYT blocked the initial hit (403)")

# Digging out the auth_token from the mess of HTML/JS
token_match = re.search(
r'authToken(?:"|"):(?:"|")([^&"]+)', init_res.text)
r'authToken(?:"|"|\\")\s*:\s*(?:"|"|\\")([^&"\\]+)',
init_res.text
)

if not token_match:
return Result.error("Could not extract NYT auth_token")
return Result.error("Couldn't find the auth_token in the page")

auth_token = html.unescape(token_match.group(1))

Expand All @@ -37,32 +49,44 @@ async def _check(email: str) -> Result:
"environment": "production"
}

# Update headers for the API call
# The critical tracking/origin headers
api_headers = headers.copy()
api_headers.update({
'Content-Type': "application/json",
'Accept': "application/json",
'req-details': "[[it:lui]]",
'Origin': "https://myaccount.nytimes.com",
'Referer': login_url
'Referer': login_url,
'sec-fetch-site': "same-origin",
'sec-fetch-mode': "cors",
'sec-fetch-dest': "empty"
})

response = await client.post(check_url, content=json.dumps(payload), headers=api_headers)
data = response.json()
response = await client.post(
check_url,
content=json.dumps(payload),
headers=api_headers
)

if response.status_code == 403:
return Result.error("Bot detection triggered on the check (403)")

if response.status_code != 200:
return Result.error(f"API acted up: {response.status_code}")

further_action = data.get("data", {}).get("further_action", "")
res_data = response.json()
further_action = res_data.get("data", {}).get("further_action", "")

# If it says show-login, they have an account. If show-register, they don't.
if further_action == "show-login":
return Result.taken(url=show_url)
elif further_action == "show-register":
return Result.available(url=show_url)

return Result.error("Unexpected response body, report it on github")
return Result.error(f"Got an weird action: {further_action}")

except httpx.ConnectTimeout:
return Result.error("Connection timed out!")
except httpx.ReadTimeout:
return Result.error("Server took too long to respond (Read Timeout)")
return Result.error("NYT took too long to answer")
except Exception as e:
return Result.error(e)

Expand Down
52 changes: 29 additions & 23 deletions user_scanner/email_scan/travel/polarsteps.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,56 @@
import httpx
import json
from user_scanner.core.result import Result


async def _check(email: str) -> Result:
show_url = "https://polarsteps.com"
url = "https://www.polarsteps.com/send_password_reset"
# Switching to the login endpoint to leverage 401 vs 404 status codes
url = "https://www.polarsteps.com/api/login"

payload = {
'email': email
"username": email,
"password": "nic3_guys_finish_last" # Dummy password for existence check
}

headers = {
'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Mobile Safari/537.36",
'User-Agent': "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Mobile Safari/537.36",
'Accept': "application/json, text/plain, */*",
'Accept-Encoding': "identity",
'Content-Type': "application/json",
'sec-ch-ua-platform': '"Android"',
'sec-ch-ua': '"Not:A-Brand";v="99", "Google Chrome";v="145", "Chromium";v="145"',
'polarsteps-api-version': "69",
'sec-ch-ua': '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"',
'sec-ch-ua-mobile': "?1",
'origin': "https://www.polarsteps.com",
'sec-fetch-site': "same-origin",
'sec-fetch-mode': "cors",
'sec-fetch-dest': "empty",
'referer': "https://www.polarsteps.com/forgot_password",
'accept-language': "en-US,en;q=0.9",
'priority': "u=1, i"
'Origin': "https://www.polarsteps.com",
'Referer': "https://www.polarsteps.com/login",
'Accept-Language': "en-US,en;q=0.9,ru;q=0.8",
'Priority': "u=1, i"
}

try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.post(url, data=payload, headers=headers)
status = response.status_code
response = await client.post(
url,
content=json.dumps(payload),
headers=headers
)

if status == 403:
return Result.error("Caught by WAF or IP Block (403)")
status = response.status_code

if status == 200:
data = response.json()
# 401 means the account exists but the password (dummy) was wrong
if status == 401:
return Result.taken(url=show_url)

if data.get("success") == "OK":
return Result.taken(url=show_url)
# 404 means the username/email is not registered in their system
if status == 404:
return Result.available(url=show_url)

error_msg = data.get("error", {}).get("email", "")
if "don't have any user" in error_msg:
return Result.available(url=show_url)
if status == 403:
return Result.error("Caught by WAF or IP Block (403)")

if status == 429:
return Result.error("Rate limited by Polarsteps")
return Result.error("Rate limited by Polarsteps (429)")

return Result.error(f"Unexpected status code: {status}")

Expand Down
Loading