Skip to content

Commit 593118b

Browse files
author
weilixu
committed
HTML parser module
1 parent 7ed236c commit 593118b

10 files changed

Lines changed: 47998 additions & 53 deletions

File tree

-221 Bytes
Binary file not shown.
531 Bytes
Binary file not shown.

BuildSimHubAPI/buildsimhub.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from BuildSimHubAPI.helpers import simulationJob
33
from BuildSimHubAPI.helpers import simulationType
44
from BuildSimHubAPI.helpers import energyModel
5-
from BuildSimHubAPI.helpers import htmlResults
65

76
class BuildSimHubAPIClient():
87
"""
@@ -40,6 +39,3 @@ def get_model(self, simulationJob):
4039
modelKey = vars(simulationJob)['_trackToken']
4140
model = energyModel.Model(self._userAPI,modelKey)
4241
return model
43-
44-
def get_html(self, simulationJob):
45-
html = htmlResults.HTMLResults(self._userAPI,simulaitonJob)

BuildSimHubAPI/eplusHTMLParser.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from BuildSimHubAPI.htmlParser import numericValueParser
2+
3+
def extract_a_value_from_table(html, report, table, column, row, category = "EntireFacility"):
4+
data_reader = numericValueParser.NumericValueParser(report, table, column, row, category)
5+
data_reader.feed(html)
6+
return {'value':data_reader.data, 'unit':data_reader.unit}

BuildSimHubAPI/helpers/htmlResults.py

Lines changed: 0 additions & 46 deletions
This file was deleted.
Binary file not shown.
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
2+
import re
3+
from html.parser import HTMLParser
4+
5+
class NumericValueParser(HTMLParser):
6+
def __init__(self, report, table, column_name, row_name, reportFor = "EntireFacility"):
7+
HTMLParser.__init__(self)
8+
9+
r = re.sub('\W','',report);
10+
t = re.sub('\W','',table);
11+
rf = re.sub('\W','',reportFor);
12+
13+
self._tableId = r+":" + rf + ":" + t
14+
self._column = column_name
15+
self._row = row_name
16+
#put the end point
17+
self._in_table = False
18+
self._in_header = False
19+
self._in_row = False
20+
self._correct_row = False
21+
22+
self._col_index = 0
23+
self._current_col_index = 0
24+
self._unit = ''
25+
self._data = ''
26+
27+
@property
28+
def data(self):
29+
return self._data
30+
31+
@property
32+
def unit(self):
33+
return self._unit
34+
35+
def handle_starttag(self, tag, attributes):
36+
#this means if we are processing table
37+
#but we met another start tag table,
38+
#we then are out of the target table, should skip
39+
if(self._in_table & (tag=='table')):
40+
self._in_table = False
41+
return
42+
#print("This is beginning ==> " + tag)
43+
#search for matching table
44+
if(tag=='table'):
45+
for name, value in attributes:
46+
if name == 'tableid' and value == self._tableId:
47+
self._in_table = True
48+
self._in_header = True
49+
break
50+
#if we are in a row - reset the row specs
51+
elif((tag=='tr')):
52+
self._current_col_index = 0
53+
self._in_row = True
54+
55+
56+
def handle_endtag(self, tag):
57+
#print("This is end: ==> " + tag)
58+
if(self._in_header & (tag=='td')):
59+
#in header, any td is one column
60+
self._col_index += 1
61+
elif(tag =='tr'):
62+
#this is the end of a row
63+
self._in_header = False
64+
self._in_row = False
65+
66+
#we are outside of correct row now,
67+
#turn the flag off
68+
if(self._correct_row):
69+
self._correct_row = False
70+
elif(tag =='table'):
71+
self._in_table = False
72+
#didn't find the value, return
73+
74+
def handle_data(self, data):
75+
data = data.strip()
76+
77+
#if it is empty data, return
78+
if(data == ''):
79+
return
80+
81+
#if not in this table, return
82+
if(not self._in_table):
83+
return
84+
85+
#check whether the data has unit
86+
index = data.find('[')
87+
unit = ''
88+
#reform the string & separate the unit
89+
if(index > -1):
90+
unit = data[index:]
91+
data = data[:index-1]
92+
93+
#print(self._correct_row)
94+
#print("******" + str(self._current_col_index) + "#####" + str(self._col_index))
95+
96+
if(self._in_header & (data == self._column)):
97+
self._in_header = False
98+
self._unit = unit
99+
elif(self._correct_row & (self._current_col_index == self._col_index)):
100+
#print("This is data: " + data + " " + str(self._current_col_index) + " (*&^%$#@" + str(self._col_index))
101+
self._correct_row = False #turn off the flag
102+
self._data = data
103+
104+
elif(self._in_row):
105+
#means we find the correct row
106+
if(data == self._row):
107+
self._correct_row = True
108+
109+
self._current_col_index+=1
110+
111+
###############LOCAL TEST########################
112+
#tableReader = NumericValueParser('Climatic Data Summary','Sizing Period Design Day','Maximum Dry Bulb','CHICAGO ANN HTG 99.6% CONDNS DB')
113+
#file = open('testTable.html', 'r')
114+
#tableReader.feed(file.read())
115+
#print(tableReader.data)
116+
#print(tableReader.unit)

0 commit comments

Comments
 (0)