Skip to content

Commit c3a42fd

Browse files
committed
Fix scraping for relaunched kvb website
1 parent 202f5a1 commit c3a42fd

File tree

1 file changed

+17
-13
lines changed

1 file changed

+17
-13
lines changed

server.py

+17-13
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717

1818
# URL templates fuer den Scraper
1919
URL_TEMPLATES = {
20-
"station_details": "/german/hst/overview/{station_id:d}/",
21-
"line_details": "/german/hst/showline/{station_id:d}/{line_id:d}/",
22-
"schedule_table": "/german/hst/aushang/{station_id:d}/",
23-
"schedule_pocket": "/german/hst/miniplan/{station_id:d}/",
20+
"station_details": "/haltestellen/overview/{station_id:d}/",
21+
"line_details": "/haltestellen/showline/{station_id:d}/{line_id:d}/",
22+
"schedule_table": "/haltestellen/aushang/{station_id:d}/",
23+
"schedule_pocket": "/haltestellen/miniplan/{station_id:d}/",
2424
"departures": "/qr/{station_id:d}/"
2525
}
2626

@@ -49,7 +49,7 @@ def get_stations():
4949
Ruft Liste aller Stationen ab und gibt
5050
Dict mit ID als Schlüssel und Name als Wert aus.
5151
"""
52-
url = "http://www.kvb-koeln.de/german/hst/overview/"
52+
url = "https://www.kvb.koeln/haltestellen/overview/"
5353
r = requests.get(url, headers=HEADERS)
5454
soup = BeautifulSoup(r.text)
5555
#print(soup.prettify())
@@ -80,15 +80,15 @@ def get_station_details(station_id):
8080
"""
8181
Liest Details zu einer Station.
8282
"""
83-
url = "http://www.kvb-koeln.de/german/hst/overview/%d/" % station_id
83+
url = "https://www.kvb.koeln/haltestellen/overview/%d/" % station_id
8484
r = requests.get(url, headers=HEADERS)
8585
soup = BeautifulSoup(r.text)
8686
details = {
8787
"station_id": station_id,
8888
"name": stations[station_id],
8989
"line_ids": set()
9090
}
91-
div = soup.find("div", class_="fliesstext")
91+
div = soup.find("ul", class_="info-list")
9292
for a in div.find_all("a"):
9393
href = a.get("href")
9494
if href is None:
@@ -107,7 +107,7 @@ def get_line_details(station_id, line_id):
107107
"""
108108
Findet heraus, welche Stationen eine Linie anfährt
109109
"""
110-
url = "http://www.kvb-koeln.de/german/hst/showline/%d/%d/" % (
110+
url = "https://www.kvb.koeln/haltestellen/showline/%d/%d/" % (
111111
station_id, line_id)
112112
r = requests.get(url, headers=HEADERS)
113113
soup = BeautifulSoup(r.text)
@@ -118,8 +118,14 @@ def get_line_details(station_id, line_id):
118118
"stations_reverse": []
119119
}
120120
station_key = "stations_forward"
121+
count = 0
121122
for td in soup.find_all("td", class_=re.compile(".*station")):
122123
tdclass = td.get("class")[0]
124+
if tdclass == u'station-top':
125+
count = count + 1
126+
if count == 2:
127+
station_key = "stations_reverse"
128+
123129
a = td.find("a")
124130
if a is None:
125131
continue
@@ -132,21 +138,19 @@ def get_line_details(station_id, line_id):
132138
if result is None:
133139
continue
134140
details[station_key].append(int(result["station_id"]))
135-
if tdclass == u'btstation':
136-
station_key = "stations_reverse"
137141
return details
138142

139143

140144
def get_departures(station_id):
141145
"""
142146
Aktuelle Abfahrten von einer Station laden
143147
"""
144-
url = "http://www.kvb-koeln.de/qr/%d/" % station_id
148+
url = "https://www.kvb.koeln/qr/%d/" % station_id
145149
r = requests.get(url, headers=HEADERS)
146150
soup = BeautifulSoup(r.text)
147-
tables = soup.find_all("table", class_="qr_table")
151+
tables = soup.find_all("table", class_="display")
148152
departures = []
149-
for row in tables[1].find_all("tr"):
153+
for row in tables[0].find_all("tr"):
150154
tds = row.find_all("td")
151155
(line_id, direction, time) = (tds[0].text, tds[1].text, tds[2].text)
152156
line_id = line_id.replace(u"\xa0", "")

0 commit comments

Comments
 (0)