-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtable_builder.py
More file actions
85 lines (67 loc) · 3 KB
/
table_builder.py
File metadata and controls
85 lines (67 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from google.transit import gtfs_realtime_pb2
from python_src import nyct_subway_pb2
import os
import re
import pandas as pd
import datetime
import time
import logging
from collections import defaultdict
from app.utils import (time_of_day_from_unix_ts,
time_of_day_from_minutes,
day_of_week_from_datestamp,
timediff,
get_gtfs_sched,
datapath,
vehicle_stop_statuses,
schedule_relationships)
logging.basicConfig(level = logging.INFO)
def grouped_entities(feed_message):
if len(feed_message.entity) %2 != 0:
raise ValueError('odd number of entities')
for e1, e2 in zip(feed_message.entity[::2], feed_message.entity[1::2]):
yield e1.trip_update, e2.vehicle
return
months = ['2018'+x for x in ('08','09','10','11','12')]
def build_table():
basepath = datapath + 'gtfs_realtime/'
for month in months:
month_path = basepath + month + '/'
for date in sorted(fname for fname in os.listdir(month_path) if '.' not in fname):
date_path = month_path + date + '/'
processed_schedule = defaultdict(dict)
prev_next_stops = {}
for filename in sorted(os.listdir(month_path+date)):
if filename.split('_')[1] == 'ace':
with open(date_path + filename, 'rb') as f:
content = f.read()
feed_message = gtfs_realtime_pb2.FeedMessage()
try:
feed_message.ParseFromString(content)
except KeyboardInterrupt:
raise
except BaseException as e:
logging.warning(f'skipping {filename}: {e}')
logging.info(f'opened {filename}')
for e in feed_message.entity: # only one of these will match the trip_id (in theory)
if (e.HasField('trip_update')
and e.trip_update.trip.trip_id.endswith('A..S')
and e.trip_update.stop_time_update):
trip_id = e.trip_update.trip.trip_id
next_stop = min((stu.arrival.time, stu.stop_id) for stu in e.trip_update.stop_time_update)
if trip_id not in prev_next_stops:
prev_next_stops[trip_id] = next_stop
prev_next_stop = prev_next_stops[trip_id]
if next_stop[1] != prev_next_stop[1]:
processed_schedule[date + '_' + trip_id][(next_stop[1], 'arrival')] = time_of_day_from_unix_ts(next_stop[0])
processed_schedule[date + '_' + trip_id][(prev_next_stop[1], 'departure')] = time_of_day_from_unix_ts(prev_next_stop[0])
prev_next_stops[trip_id] = next_stop
if len(processed_schedule) == 0:
continue
ps = ({'trip_id': trip_id, **d} for trip_id, d in processed_schedule.items())
ps = pd.DataFrame(ps)
ps = ps.set_index('trip_id').sort_index().sort_index(axis=1)
ps.columns = pd.MultiIndex.from_tuples(ps.columns)
ps.to_csv(f'~/ssd/trip_tables/{date}.csv')
if __name__ == "__main__":
build_table()