|
| 1 | +import json |
| 2 | +import re |
| 3 | +import sys |
| 4 | + |
1 | 5 | import requests |
2 | 6 | from bs4 import BeautifulSoup |
3 | | - |
4 | 7 | from tabulate import tabulate |
5 | 8 |
|
6 | | -SOURCE_URL = "https://www.espncricinfo.com/scores/" |
| 9 | +LIVE_SCORES_URL = "https://www.cricbuzz.com/cricket-match/live-scores" |
| 10 | +HEADERS = { |
| 11 | + "User-Agent": ( |
| 12 | + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " |
| 13 | + "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" |
| 14 | + ), |
| 15 | + "Accept-Language": "en-US,en;q=0.9", |
| 16 | +} |
| 17 | + |
| 18 | + |
| 19 | +def get_scores(): |
| 20 | + """Fetch live cricket scores from Cricbuzz.""" |
| 21 | + response = requests.get(LIVE_SCORES_URL, headers=HEADERS, timeout=10) |
| 22 | + response.raise_for_status() |
| 23 | + soup = BeautifulSoup(response.text, "html.parser") |
| 24 | + |
| 25 | + # Cricbuzz uses Next.js with embedded JSON data in script tags |
| 26 | + for script in soup.find_all("script"): |
| 27 | + text = script.string or "" |
| 28 | + if "matchesList" not in text: |
| 29 | + continue |
| 30 | + return _parse_matches_from_script(text) |
| 31 | + |
| 32 | + return [] |
| 33 | + |
| 34 | + |
| 35 | +def _parse_matches_from_script(text): |
| 36 | + """Extract match data from the Next.js RSC script payload.""" |
| 37 | + # Unescape the embedded JSON strings |
| 38 | + unescaped = text.replace('\\"', '"').replace("\\n", "\n") |
| 39 | + |
| 40 | + matches = [] |
| 41 | + # Find all match JSON blocks: {"match":{"matchInfo":{...},"matchScore":{...}}} |
| 42 | + # We locate each matchInfo and extract the surrounding match object |
| 43 | + for m in re.finditer(r'"match":\{"matchInfo":\{', unescaped): |
| 44 | + start = m.start() - 1 # include the opening { |
| 45 | + match_data = _extract_json_object(unescaped, start) |
| 46 | + if match_data: |
| 47 | + parsed = _parse_match(match_data) |
| 48 | + if parsed: |
| 49 | + matches.append(parsed) |
| 50 | + |
| 51 | + # Deduplicate by matchId (data appears multiple times in RSC payload) |
| 52 | + seen = set() |
| 53 | + unique = [] |
| 54 | + for m in matches: |
| 55 | + mid = m.get("match_id") |
| 56 | + if mid and mid not in seen: |
| 57 | + seen.add(mid) |
| 58 | + unique.append(m) |
| 59 | + |
| 60 | + return unique |
| 61 | + |
| 62 | + |
| 63 | +def _extract_json_object(text, start): |
| 64 | + """Extract a balanced JSON object starting at position `start`.""" |
| 65 | + if text[start] != "{": |
| 66 | + return None |
| 67 | + depth = 0 |
| 68 | + for i in range(start, min(start + 5000, len(text))): |
| 69 | + if text[i] == "{": |
| 70 | + depth += 1 |
| 71 | + elif text[i] == "}": |
| 72 | + depth -= 1 |
| 73 | + if depth == 0: |
| 74 | + try: |
| 75 | + return json.loads(text[start : i + 1]) |
| 76 | + except json.JSONDecodeError: |
| 77 | + return None |
| 78 | + return None |
| 79 | + |
| 80 | + |
| 81 | +def _parse_match(data): |
| 82 | + """Parse a match dict into our display format.""" |
| 83 | + match = data.get("match", data) |
| 84 | + info = match.get("matchInfo", {}) |
| 85 | + score_data = match.get("matchScore", {}) |
| 86 | + |
| 87 | + team1_info = info.get("team1", {}) |
| 88 | + team2_info = info.get("team2", {}) |
| 89 | + |
| 90 | + if not team1_info or not team2_info: |
| 91 | + return None |
| 92 | + |
| 93 | + team1_score = _format_score(score_data.get("team1Score", {})) |
| 94 | + team2_score = _format_score(score_data.get("team2Score", {})) |
| 95 | + |
| 96 | + status = info.get("stateTitle", "") or info.get("state", "") |
| 97 | + |
| 98 | + return { |
| 99 | + "match_id": info.get("matchId"), |
| 100 | + "title": f"{info.get('seriesName', '')} - {info.get('matchDesc', '')}", |
| 101 | + "format": info.get("matchFormat", ""), |
| 102 | + "first_team": { |
| 103 | + "name": team1_info.get("teamSName", team1_info.get("teamName", "")), |
| 104 | + "score": team1_score, |
| 105 | + }, |
| 106 | + "second_team": { |
| 107 | + "name": team2_info.get("teamSName", team2_info.get("teamName", "")), |
| 108 | + "score": team2_score, |
| 109 | + }, |
| 110 | + "status": status, |
| 111 | + } |
| 112 | + |
| 113 | + |
| 114 | +def _format_score(team_score): |
| 115 | + """Format innings scores into a readable string.""" |
| 116 | + parts = [] |
| 117 | + for key in ("inngs1", "inngs2"): |
| 118 | + innings = team_score.get(key) |
| 119 | + if innings: |
| 120 | + runs = innings.get("runs", "") |
| 121 | + wickets = innings.get("wickets", "") |
| 122 | + overs = innings.get("overs", "") |
| 123 | + score_str = f"{runs}/{wickets}" |
| 124 | + if overs: |
| 125 | + score_str += f" ({overs} ov)" |
| 126 | + parts.append(score_str) |
| 127 | + return " & ".join(parts) |
7 | 128 |
|
8 | 129 |
|
9 | 130 | def _print_scores(scores): |
10 | | - table = [ |
11 | | - [ |
12 | | - f"{score['first_team']['name']} {score['first_team']['score']}", |
13 | | - f"{score['second_team']['name']} {score['second_team']['score']}" |
14 | | - ] |
15 | | - for score in scores |
16 | | - ] |
17 | | - table_len = len(table) |
18 | | - print(tabulate( |
19 | | - table, |
20 | | - showindex=range(1, table_len + 1), |
21 | | - tablefmt='fancy_grid') |
22 | | - ) |
| 131 | + """Print scores in a formatted table.""" |
| 132 | + if not scores: |
| 133 | + print("No live matches at the moment.") |
| 134 | + return |
23 | 135 |
|
| 136 | + table = [] |
| 137 | + for score in scores: |
| 138 | + first = score["first_team"] |
| 139 | + second = score["second_team"] |
| 140 | + status = score.get("status", "") |
| 141 | + fmt = score.get("format", "") |
24 | 142 |
|
25 | | -def get_scores(): |
26 | | - html = requests.get(SOURCE_URL).text |
27 | | - soup = BeautifulSoup(html, "lxml") |
28 | | - competitors = soup.find_all('ul', attrs={'class': 'cscore_competitors'}) |
29 | | - |
30 | | - scores = [] |
31 | | - |
32 | | - for each_competitor in competitors: |
33 | | - team_scores = list(map(_extract_score, each_competitor.find_all('div', {'class': 'cscore_score'}))) |
34 | | - team_names = list(map(lambda x: x.contents[0], each_competitor.find_all('span', {'class': 'cscore_name--long'}))) |
35 | | - |
36 | | - scores.append( |
37 | | - { |
38 | | - 'first_team': { |
39 | | - 'name': team_names[0], |
40 | | - 'score': team_scores[0] |
41 | | - }, |
42 | | - 'second_team': { |
43 | | - 'name': team_names[1], |
44 | | - 'score': team_scores[1] |
45 | | - }, |
46 | | - } |
47 | | - ) |
48 | | - return scores |
| 143 | + first_col = first["name"] |
| 144 | + if first["score"]: |
| 145 | + first_col += f" {first['score']}" |
49 | 146 |
|
| 147 | + second_col = second["name"] |
| 148 | + if second["score"]: |
| 149 | + second_col += f" {second['score']}" |
50 | 150 |
|
51 | | -def _extract_score(score_soup): |
52 | | - score_string = score_soup.contents |
53 | | - if len(score_string) == 2: |
54 | | - score, overs = score_string |
55 | | - overs_content = overs.contents |
56 | | - score_string = [f'{score} {overs_content[0]}'] |
57 | | - return score_string |
| 151 | + table.append([first_col, "vs", second_col, fmt, status]) |
58 | 152 |
|
| 153 | + print( |
| 154 | + tabulate( |
| 155 | + table, |
| 156 | + headers=["Team 1", "", "Team 2", "Format", "Status"], |
| 157 | + showindex=range(1, len(table) + 1), |
| 158 | + tablefmt="fancy_grid", |
| 159 | + ) |
| 160 | + ) |
59 | 161 |
|
60 | | -def main(): |
61 | | - scores = get_scores() |
62 | | - _print_scores(scores) |
63 | 162 |
|
| 163 | +def main(): |
| 164 | + try: |
| 165 | + scores = get_scores() |
| 166 | + _print_scores(scores) |
| 167 | + except requests.ConnectionError: |
| 168 | + print( |
| 169 | + "Error: Could not connect. Check your internet connection.", |
| 170 | + file=sys.stderr, |
| 171 | + ) |
| 172 | + sys.exit(1) |
| 173 | + except requests.Timeout: |
| 174 | + print("Error: Request timed out.", file=sys.stderr) |
| 175 | + sys.exit(1) |
| 176 | + except requests.HTTPError as e: |
| 177 | + print(f"Error: HTTP {e.response.status_code}", file=sys.stderr) |
| 178 | + sys.exit(1) |
| 179 | + except Exception as e: |
| 180 | + print(f"Error: {e}", file=sys.stderr) |
| 181 | + sys.exit(1) |
0 commit comments