|
8 | 8 |
|
9 | 9 | find_cuny_hours = re.compile(r'(\d{1,2}(:\d\d)?)\s*-\s*((\d{1,2}(:\d\d)?)\s*(([AP]M)|NOON))')
|
10 | 10 |
|
11 |
| -if __name__ == "__main__": |
12 |
| - |
13 |
| - soup = BeautifulSoup(open("citycollege/registrar.html")) |
14 |
| - |
15 |
| - starts = [] |
16 |
| - ends = [] |
17 |
| - |
18 |
| - coursetables = soup.findAll("table", {"class":"coursetable"}) |
19 |
| - for course in coursetables: |
20 |
| - classtimes = course.findAll("td", text=re.compile(r'\d{1,2}:\d\d')) |
21 |
| - for c in classtimes: |
22 |
| - m = find_cuny_hours.search(c) |
23 |
| - if m: |
24 |
| - x = m.groups() |
25 |
| - start = x[0] |
26 |
| - end = x[2] |
27 |
| - starts.append(start) |
28 |
| - ends.append(end) |
29 |
| - print start,end |
30 |
| - else: |
31 |
| - print "No match found" |
32 |
| - print c |
33 |
| - print "found %(count)d total class times" % { "count": len(starts) } |
34 |
| - |
35 |
| - # dividing up the classes we've found into different timeslots to count them |
36 |
| - timeslots = defaultdict(int) |
37 |
| - for i in range(len(starts)): |
38 |
| - timeslots[starts[i]+"-"+ends[i]] += 1 |
39 |
| - |
40 |
| - print(sorted(timeslots.items(), key=lambda x:x[1], reverse=True)[:10]) |
| 11 | +soup = BeautifulSoup(open("citycollege/registrar.html")) |
| 12 | + |
| 13 | +starts = [] |
| 14 | +ends = [] |
| 15 | + |
| 16 | +coursetables = soup.findAll("table", {"class":"coursetable"}) |
| 17 | +for course in coursetables: |
| 18 | + classtimes = course.findAll("td", text=re.compile(r'\d{1,2}:\d\d')) |
| 19 | + for c in classtimes: |
| 20 | + m = find_cuny_hours.search(c) |
| 21 | + if m: |
| 22 | + x = m.groups() |
| 23 | + start = x[0] |
| 24 | + end = x[2] |
| 25 | + starts.append(start) |
| 26 | + ends.append(end) |
| 27 | + print start,end |
| 28 | + else: |
| 29 | + print "No match found" |
| 30 | + print c |
| 31 | +print "found %(count)d total class times" % { "count": len(starts) } |
| 32 | + |
| 33 | +# dividing up the classes we've found into different timeslots to count them |
| 34 | +timeslots = defaultdict(int) |
| 35 | +for i in range(len(starts)): |
| 36 | + timeslots[starts[i]+"-"+ends[i]] += 1 |
| 37 | + |
| 38 | +print(sorted(timeslots.items(), key=lambda x:x[1], reverse=True)[:10]) |
0 commit comments