Skip to content

Commit fad20ca

Browse files
committed
Revive project: switch to Cricbuzz, pixi, and GitHub Actions CI
- Replace broken ESPN Cricinfo HTML scraper with Cricbuzz embedded JSON parser - Switch from setup.py/requirements.txt to pyproject.toml with pixi - Replace Travis CI with GitHub Actions (Python 3.10-3.13 matrix) - Add proper tests for score formatting, match parsing, and e2e - Drop lxml dependency (use html.parser instead)
1 parent c8d8e97 commit fad20ca

8 files changed

Lines changed: 328 additions & 115 deletions

File tree

.github/workflows/test.yml

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,26 @@
1-
# This workflow will install Python dependencies, run tests and lint with a single version of Python
2-
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3-
41
name: Test
52

63
on:
74
push:
5+
branches: [master]
86
pull_request:
97

108
permissions:
119
contents: read
1210

1311
jobs:
14-
build:
15-
12+
test:
1613
runs-on: ubuntu-latest
14+
strategy:
15+
matrix:
16+
python-version: ["3.10", "3.11", "3.12", "3.13"]
1717

1818
steps:
19-
- uses: actions/checkout@v3
20-
- name: Set up Python 3.10
21-
uses: actions/setup-python@v3
22-
with:
23-
python-version: "3.10"
24-
- name: Install dependencies
25-
run: |
26-
python -m pip install --upgrade pip
27-
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28-
- name: Test
29-
run: |
30-
python -m unittest discover -v
19+
- uses: actions/checkout@v4
20+
21+
- uses: prefix-dev/setup-pixi@v0.8.1
22+
with:
23+
pixi-version: v0.46.0
24+
25+
- name: Run tests
26+
run: pixi run test

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,7 @@ ENV/
9090

9191
# Rope project settings
9292
.ropeproject
93+
94+
# Pixi
95+
.pixi/
96+
pixi.lock

.travis.yml

Lines changed: 0 additions & 10 deletions
This file was deleted.

criclive/main.py

Lines changed: 167 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,181 @@
1+
import json
2+
import re
3+
import sys
4+
15
import requests
26
from bs4 import BeautifulSoup
3-
47
from tabulate import tabulate
58

6-
SOURCE_URL = "https://www.espncricinfo.com/scores/"
9+
LIVE_SCORES_URL = "https://www.cricbuzz.com/cricket-match/live-scores"
10+
HEADERS = {
11+
"User-Agent": (
12+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
13+
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
14+
),
15+
"Accept-Language": "en-US,en;q=0.9",
16+
}
17+
18+
19+
def get_scores():
20+
"""Fetch live cricket scores from Cricbuzz."""
21+
response = requests.get(LIVE_SCORES_URL, headers=HEADERS, timeout=10)
22+
response.raise_for_status()
23+
soup = BeautifulSoup(response.text, "html.parser")
24+
25+
# Cricbuzz uses Next.js with embedded JSON data in script tags
26+
for script in soup.find_all("script"):
27+
text = script.string or ""
28+
if "matchesList" not in text:
29+
continue
30+
return _parse_matches_from_script(text)
31+
32+
return []
33+
34+
35+
def _parse_matches_from_script(text):
36+
"""Extract match data from the Next.js RSC script payload."""
37+
# Unescape the embedded JSON strings
38+
unescaped = text.replace('\\"', '"').replace("\\n", "\n")
39+
40+
matches = []
41+
# Find all match JSON blocks: {"match":{"matchInfo":{...},"matchScore":{...}}}
42+
# We locate each matchInfo and extract the surrounding match object
43+
for m in re.finditer(r'"match":\{"matchInfo":\{', unescaped):
44+
start = m.start() - 1 # include the opening {
45+
match_data = _extract_json_object(unescaped, start)
46+
if match_data:
47+
parsed = _parse_match(match_data)
48+
if parsed:
49+
matches.append(parsed)
50+
51+
# Deduplicate by matchId (data appears multiple times in RSC payload)
52+
seen = set()
53+
unique = []
54+
for m in matches:
55+
mid = m.get("match_id")
56+
if mid and mid not in seen:
57+
seen.add(mid)
58+
unique.append(m)
59+
60+
return unique
61+
62+
63+
def _extract_json_object(text, start):
64+
"""Extract a balanced JSON object starting at position `start`."""
65+
if text[start] != "{":
66+
return None
67+
depth = 0
68+
for i in range(start, min(start + 5000, len(text))):
69+
if text[i] == "{":
70+
depth += 1
71+
elif text[i] == "}":
72+
depth -= 1
73+
if depth == 0:
74+
try:
75+
return json.loads(text[start : i + 1])
76+
except json.JSONDecodeError:
77+
return None
78+
return None
79+
80+
81+
def _parse_match(data):
82+
"""Parse a match dict into our display format."""
83+
match = data.get("match", data)
84+
info = match.get("matchInfo", {})
85+
score_data = match.get("matchScore", {})
86+
87+
team1_info = info.get("team1", {})
88+
team2_info = info.get("team2", {})
89+
90+
if not team1_info or not team2_info:
91+
return None
92+
93+
team1_score = _format_score(score_data.get("team1Score", {}))
94+
team2_score = _format_score(score_data.get("team2Score", {}))
95+
96+
status = info.get("stateTitle", "") or info.get("state", "")
97+
98+
return {
99+
"match_id": info.get("matchId"),
100+
"title": f"{info.get('seriesName', '')} - {info.get('matchDesc', '')}",
101+
"format": info.get("matchFormat", ""),
102+
"first_team": {
103+
"name": team1_info.get("teamSName", team1_info.get("teamName", "")),
104+
"score": team1_score,
105+
},
106+
"second_team": {
107+
"name": team2_info.get("teamSName", team2_info.get("teamName", "")),
108+
"score": team2_score,
109+
},
110+
"status": status,
111+
}
112+
113+
114+
def _format_score(team_score):
115+
"""Format innings scores into a readable string."""
116+
parts = []
117+
for key in ("inngs1", "inngs2"):
118+
innings = team_score.get(key)
119+
if innings:
120+
runs = innings.get("runs", "")
121+
wickets = innings.get("wickets", "")
122+
overs = innings.get("overs", "")
123+
score_str = f"{runs}/{wickets}"
124+
if overs:
125+
score_str += f" ({overs} ov)"
126+
parts.append(score_str)
127+
return " & ".join(parts)
7128

8129

9130
def _print_scores(scores):
10-
table = [
11-
[
12-
f"{score['first_team']['name']} {score['first_team']['score']}",
13-
f"{score['second_team']['name']} {score['second_team']['score']}"
14-
]
15-
for score in scores
16-
]
17-
table_len = len(table)
18-
print(tabulate(
19-
table,
20-
showindex=range(1, table_len + 1),
21-
tablefmt='fancy_grid')
22-
)
131+
"""Print scores in a formatted table."""
132+
if not scores:
133+
print("No live matches at the moment.")
134+
return
23135

136+
table = []
137+
for score in scores:
138+
first = score["first_team"]
139+
second = score["second_team"]
140+
status = score.get("status", "")
141+
fmt = score.get("format", "")
24142

25-
def get_scores():
26-
html = requests.get(SOURCE_URL).text
27-
soup = BeautifulSoup(html, "lxml")
28-
competitors = soup.find_all('ul', attrs={'class': 'cscore_competitors'})
29-
30-
scores = []
31-
32-
for each_competitor in competitors:
33-
team_scores = list(map(_extract_score, each_competitor.find_all('div', {'class': 'cscore_score'})))
34-
team_names = list(map(lambda x: x.contents[0], each_competitor.find_all('span', {'class': 'cscore_name--long'})))
35-
36-
scores.append(
37-
{
38-
'first_team': {
39-
'name': team_names[0],
40-
'score': team_scores[0]
41-
},
42-
'second_team': {
43-
'name': team_names[1],
44-
'score': team_scores[1]
45-
},
46-
}
47-
)
48-
return scores
143+
first_col = first["name"]
144+
if first["score"]:
145+
first_col += f" {first['score']}"
49146

147+
second_col = second["name"]
148+
if second["score"]:
149+
second_col += f" {second['score']}"
50150

51-
def _extract_score(score_soup):
52-
score_string = score_soup.contents
53-
if len(score_string) == 2:
54-
score, overs = score_string
55-
overs_content = overs.contents
56-
score_string = [f'{score} {overs_content[0]}']
57-
return score_string
151+
table.append([first_col, "vs", second_col, fmt, status])
58152

153+
print(
154+
tabulate(
155+
table,
156+
headers=["Team 1", "", "Team 2", "Format", "Status"],
157+
showindex=range(1, len(table) + 1),
158+
tablefmt="fancy_grid",
159+
)
160+
)
59161

60-
def main():
61-
scores = get_scores()
62-
_print_scores(scores)
63162

163+
def main():
164+
try:
165+
scores = get_scores()
166+
_print_scores(scores)
167+
except requests.ConnectionError:
168+
print(
169+
"Error: Could not connect. Check your internet connection.",
170+
file=sys.stderr,
171+
)
172+
sys.exit(1)
173+
except requests.Timeout:
174+
print("Error: Request timed out.", file=sys.stderr)
175+
sys.exit(1)
176+
except requests.HTTPError as e:
177+
print(f"Error: HTTP {e.response.status_code}", file=sys.stderr)
178+
sys.exit(1)
179+
except Exception as e:
180+
print(f"Error: {e}", file=sys.stderr)
181+
sys.exit(1)

pyproject.toml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
[project]
2+
name = "criclive"
3+
version = "0.3"
4+
description = "Live Cricket Scores in Command Line."
5+
authors = [{ name = "Amit Kumar", email = "dtu.amit@gmail.com" }]
6+
license = { text = "MIT" }
7+
keywords = ["Cricket", "score", "scores", "cli", "live"]
8+
readme = "README.md"
9+
requires-python = ">=3.10"
10+
dependencies = [
11+
"beautifulsoup4>=4.12",
12+
"requests>=2.31",
13+
"tabulate>=0.9",
14+
]
15+
16+
[project.urls]
17+
Homepage = "https://github.com/aktech/criclive"
18+
19+
[project.scripts]
20+
criclive = "criclive.main:main"
21+
22+
[build-system]
23+
requires = ["hatchling"]
24+
build-backend = "hatchling.build"
25+
26+
[tool.pixi.workspace]
27+
channels = ["conda-forge"]
28+
platforms = ["linux-64", "linux-aarch64", "osx-arm64", "osx-64"]
29+
30+
[tool.pixi.pypi-dependencies]
31+
criclive = { path = ".", editable = true }
32+
33+
[tool.pixi.tasks]
34+
start = "criclive"
35+
test = "python -m pytest tests.py"
36+
37+
[tool.pixi.dependencies]
38+
python = ">=3.10"
39+
pytest = ">=8"

requirements.txt

Lines changed: 0 additions & 4 deletions
This file was deleted.

setup.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

0 commit comments

Comments
 (0)