Skip to content

Commit 1df0d3a

Browse files
committed
merge
2 parents e0f3ec5 + d54b66e commit 1df0d3a

File tree

6 files changed

+96
-111
lines changed

6 files changed

+96
-111
lines changed

notebooks/feature_engineering.ipynb

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,14 @@
188188
"id": "7bfd56a7",
189189
"metadata": {},
190190
"outputs": [
191+
{
192+
"name": "stderr",
193+
"output_type": "stream",
194+
"text": [
195+
"[GUIDE] Successfully performed generate_entityset.\n",
196+
"\tYou can perform the next step by calling generate_label_times.\n"
197+
]
198+
},
191199
{
192200
"data": {
193201
"text/plain": [
@@ -290,6 +298,14 @@
290298
"id": "e0ee16eb",
291299
"metadata": {},
292300
"outputs": [
301+
{
302+
"name": "stderr",
303+
"output_type": "stream",
304+
"text": [
305+
"[GUIDE] Successfully performed generate_label_times.\n",
306+
"\tYou can perform the next step by calling generate_feature_matrix.\n"
307+
]
308+
},
293309
{
294310
"data": {
295311
"text/html": [
@@ -458,7 +474,9 @@
458474
"/Users/raymondpan/zephyr/Zephyr-repo/venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3464: RuntimeWarning: Mean of empty slice.\n",
459475
" return _methods._mean(a, axis=axis, dtype=dtype,\n",
460476
"/Users/raymondpan/zephyr/Zephyr-repo/venv/lib/python3.8/site-packages/numpy/core/_methods.py:192: RuntimeWarning: invalid value encountered in scalar divide\n",
461-
" ret = ret.dtype.type(ret / rcount)\n"
477+
" ret = ret.dtype.type(ret / rcount)\n",
478+
"[GUIDE] Successfully performed generate_feature_matrix.\n",
479+
"\tYou can perform the next step by calling generate_train_test_split.\n"
462480
]
463481
}
464482
],
@@ -627,10 +645,10 @@
627645
" <Feature: MAX(pidata_processed.fft.mean.mean_value)>,\n",
628646
" <Feature: MIN(pidata_processed.fft.mean.mean_value)>,\n",
629647
" <Feature: SUM(pidata_processed.fft.mean.mean_value)>,\n",
630-
" <Feature: COUNT(alarms WHERE DES_NAME = Alarm2)>,\n",
631648
" <Feature: COUNT(alarms WHERE DES_NAME = Alarm1)>,\n",
632-
" <Feature: SUM(alarms.IND_DURATION WHERE DES_NAME = Alarm2)>,\n",
649+
" <Feature: COUNT(alarms WHERE DES_NAME = Alarm2)>,\n",
633650
" <Feature: SUM(alarms.IND_DURATION WHERE DES_NAME = Alarm1)>,\n",
651+
" <Feature: SUM(alarms.IND_DURATION WHERE DES_NAME = Alarm2)>,\n",
634652
" <Feature: MAX(stoppages.NUM_WORDS(DES_COMMENTS))>,\n",
635653
" <Feature: MAX(stoppages.NUM_WORDS(DES_DESCRIPTION))>,\n",
636654
" <Feature: MAX(stoppages.NUM_WORDS(DES_WO_NAME))>,\n",

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.0.5.dev0
2+
current_version = 0.0.5.dev2
33
commit = True
44
tag = True
55
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,6 @@
124124
test_suite='tests',
125125
tests_require=tests_require,
126126
url='https://github.com/sintel-dev/zephyr',
127-
version='0.0.5.dev0',
127+
version='0.0.5.dev2',
128128
zip_safe=False,
129129
)

tests/test_guide.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,11 @@
44
class DummyObject:
55
def __init__(self):
66
producers_and_getters = [
7-
([self.step0_key, self.step0_set], [self.step0_getter]),
8-
([self.step1_key, self.step1_set], [self.step1_getter]),
9-
([self.step2_key, self.step2_set], [self.step2_getter])
7+
([self.step0_key], [self.step0_set], [self.step0_getter]),
8+
([self.step1_key], [self.step1_set], [self.step1_getter]),
9+
([self.step2_key], [self.step2_set], [self.step2_getter])
1010
]
11-
set_methods = {
12-
self.step0_set.__name__,
13-
self.step1_set.__name__,
14-
self.step2_set.__name__
15-
}
16-
self._guide_handler = GuideHandler(producers_and_getters, set_methods)
11+
self._guide_handler = GuideHandler(producers_and_getters)
1712

1813
@guide
1914
def step0_key(self):

zephyr_ml/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
__author__ = 'MIT Data To AI Lab'
66
__email__ = '[email protected]'
7-
__version__ = '0.0.5.dev0'
7+
__version__ = '0.0.5.dev2'
88

99
import os
1010

zephyr_ml/core.py

Lines changed: 68 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -30,40 +30,37 @@
3030

3131
class GuideHandler:
3232

33-
def __init__(self, producers_and_getters, set_methods):
33+
def __init__(self, ordered_steps):
3434
self.cur_iteration = 0
3535
self.current_step = -1
3636
self.start_point = -1
37-
self.producers_and_getters = producers_and_getters
38-
self.set_methods = set_methods
37+
self.ordered_steps = ordered_steps
38+
self.set_methods = set()
3939

4040
self.producer_to_step_map = {}
4141
self.getter_to_step_map = {}
4242

4343
self.iterations = []
44-
for idx, (producers, getters) in enumerate(self.producers_and_getters):
44+
for idx, (keys, sets, gets) in enumerate(self.ordered_steps):
4545
self.iterations.append(-1)
4646

47-
for prod in producers:
47+
for prod in keys:
4848
self.producer_to_step_map[prod.__name__] = idx
49+
for prod in sets:
50+
self.producer_to_step_map[prod.__name__] = idx
51+
self.set_methods.add(prod.__name__)
4952

50-
for get in getters:
53+
for get in gets:
5154
self.getter_to_step_map[get.__name__] = idx
5255

53-
def get_necessary_steps(self, actual_next_step):
54-
step_strs = []
55-
for step in range(self.current_step, actual_next_step):
56-
option_strs = []
57-
for opt in self.producers_and_getters[step][0]:
58-
option_strs.append(opt.__name__)
59-
step_strs.append(f"{step}. {' or '.join(option_strs)}")
60-
return "\n".join(step_strs)
56+
def or_join(self, methods):
57+
return " or ".join([method.__name__ for method in methods])
6158

6259
def get_get_steps_in_between(self, cur_step, next_step):
6360
step_strs = []
6461
for step in range(cur_step + 1, next_step):
6562
step_strs.append(
66-
f"{step} {self.producers_and_getters[step][1][0]}")
63+
f"{step} {self.or_join(self.ordered_steps[step][2])}")
6764
return step_strs
6865

6966
def get_last_up_to_date(self, next_step):
@@ -80,23 +77,25 @@ def get_steps_in_between(self, cur_step, next_step):
8077
step_strs = []
8178
for step in range(cur_step + 1, next_step):
8279
option_strs = []
83-
for opt in self.producers_and_getters[step][0]:
84-
option_strs.append(opt.__name__)
85-
step_strs.append(f"{step}. {' or '.join(option_strs)}")
80+
option_strs.extend(self.ordered_steps[step][0])
81+
option_strs.extend(self.ordered_steps[step][1])
82+
step_strs.append(f"{step}. {self.or_join(option_strs)}")
8683
return step_strs
8784

8885
def log_next_producer_step(self, name):
8986
next_step = self.current_step + 1
9087

91-
if next_step >= len(self.producers_and_getters):
92-
LOGGER.warning("[GUIDE] You have reached the end of the \
93-
predictive engineering workflow.\
94-
You may continue to go back and reperform steps based on results.")
88+
if next_step >= len(self.ordered_steps):
89+
cur_step_name = self.or_join(self.ordered_steps[self.current_step][0])
90+
LOGGER.warning((f"[GUIDE] Successfully performed {name}.\n"
91+
f"\tYou have reached the end of the "
92+
f"predictive engineering workflow.\n"
93+
f"\tYou can call {cur_step_name} again or re-perform previous steps "
94+
f"based on results."))
9595
else:
96-
next_step_name = self.producers_and_getters[next_step][0][0].__name__
97-
LOGGER.warning(f"[GUIDE] Successfully performed {name}. You can perform the \
98-
next step by calling\
99-
{next_step_name}.")
96+
next_step_name = self.or_join(self.ordered_steps[next_step][0])
97+
LOGGER.warning(f"[GUIDE] Successfully performed {name}.\n"
98+
f"\tYou can perform the next step by calling {next_step_name}.")
10099

101100
def perform_producer_step(self, zephyr, method,
102101
*method_args, **method_kwargs):
@@ -137,21 +136,19 @@ def try_log_backwards_key_method_warning(self, name, next_step):
137136
f"{steps_in_between}"))
138137

139138
def log_get_inconsistent_warning(self, name, next_step):
140-
prod_steps_str = ' or '.join([method.__name__ for method in
141-
self.producers_and_getters[next_step][0]])
139+
prod_steps_str = self.or_join(self.ordered_steps[next_step][0])
142140
prod_steps = f"{next_step}.{prod_steps_str}"
143141
latest_up_to_date = self.get_last_up_to_date(next_step)
144-
LOGGER.warning(f"[GUIDE] INCONSISTENCY WARNING: Unable to perform {name} \
145-
because {prod_steps} has not \
146-
been run yet. Run steps starting at or before \
147-
{latest_up_to_date} ")
142+
LOGGER.warning((f"[GUIDE] INCONSISTENCY WARNING: Unable to perform {name} because"
143+
f"{prod_steps} has not been run yet.\n"
144+
f"Run steps starting at or before {latest_up_to_date}."))
148145

149146
def log_get_stale_warning(self, name, next_step):
150147
latest_up_to_date = self.get_last_up_to_date(next_step)
151-
LOGGER.warning(f"[GUIDE] STALE WARNING: Performing {name}. \
152-
This data is potentially stale. \
153-
Re-run steps starting at or before \
154-
{latest_up_to_date} to ensure data is up to date.")
148+
LOGGER.warning((f"[GUIDE] STALE WARNING: Performing {name}.\n"
149+
f"This data is potentially stale.\n"
150+
f"Re-run steps starting at or before {latest_up_to_date}"
151+
f"to ensure data is up to date."))
155152

156153
# tries to perform step if possible -> warns that data might be stale
157154

@@ -216,52 +213,36 @@ def try_perform_inconsistent_producer_step( # add using stale and overwriting
216213
# not up to date
217214
if (next_step >= self.current_step and
218215
self.iterations[next_step - 1] != self.cur_iteration):
219-
corr_set_method = self.producers_and_getters[next_step][0][1].__name__
216+
corr_set_method = self.or_join(self.ordered_steps[next_step][1])
220217
prev_step = next_step - 1
221-
prev_set_method = self.producers_and_getters[prev_step][0][1].__name__
222-
prev_key_method = self.producers_and_getters[prev_step][0][0].__name__
223-
LOGGER.warning(f"[GUIDE] INCONSISTENCY WARNING:Unable \
224-
to perform {name} because you are\
225-
performing a key method at\
226-
step {next_step} but the result of the previous step, \
227-
step {prev_step}, is STALE.\
228-
If you already have the data for step {next_step}, \
229-
you can use the corresponding set method: {corr_set_method}.\
230-
Otherwise, please perform step {prev_step} \
231-
with {prev_key_method} or {prev_set_method}.")
232-
# inconsistent backward step: performing set method at nonzero step
233-
# elif next_step < self.current_step and name in self.set_method:
234-
# first_set_method = self.producers_and_getters[0][0][1].__name__
235-
# corr_key_method = self.producers_and_getters[next_step][0][0].__name__
236-
# LOGGER.warning(f"Unable to perform {name} because you are going backwards \
237-
# and performing step {next_step} with a set method.\
238-
# You can only perform a backwards step with a set \
239-
# method at step 0: {first_set_method}.\
240-
# If you would like to perform step {next_step}, \
241-
# please use the corresponding key method: {corr_key_method}.")
242-
# inconsistent backward step: performing key method but previous step
243-
# is not up to date
218+
prev_set_method = self.or_join(self.ordered_steps[prev_step][1])
219+
prev_key_method = self.or_join(self.ordered_steps[prev_step][0])
220+
LOGGER.warning(f"""[GUIDE] INCONSISTENCY WARNING:Unable\
221+
to perform {name} because you are performing a key method at
222+
step {next_step} but the result of the previous step,
223+
step {prev_step}, is STALE.
224+
If you already have the data for step {next_step},
225+
you can use the corresponding set method: {corr_set_method}.
226+
Otherwise, please perform step {prev_step} with
227+
{prev_key_method} or {prev_set_method}.""")
244228
elif (next_step < self.current_step and
245229
self.iterations[next_step - 1] != self.cur_iteration):
246230
prev_step = next_step - 1
247-
prev_key_method = self.producers_and_getters[prev_step][0][0].__name__
248-
corr_set_method = self.producers_and_getters[next_step][0][1].__name__
249-
prev_get_method = self.producers_and_getters[prev_step][1][0].__name__
250-
prev_set_method = self.producers_and_getters[prev_step][0][1].__name__
251-
LOGGER.warning(f"[GUIDE] INCONSISTENCY WARNING: Unable to perform {name} \
252-
because you are going \
253-
backwards and starting a new iteration by\
254-
performing a key method at step {next_step} \
255-
but the result of the previous step,\
256-
step {prev_step}, is STALE.\
257-
If you want to use the STALE result of the PREVIOUS step, \
258-
you can call {prev_get_method} to get the data, then\
259-
{prev_set_method} to set the data, and then recall this method.\
260-
If you want to regenerate the data of the PREVIOUS step, \
261-
please call {prev_key_method}, and then recall this method.\
262-
If you already have the data for THIS step, you can \
263-
call {corr_set_method} to set the data.\
264-
")
231+
prev_key_method = self.or_join(self.ordered_steps[prev_step][0])
232+
corr_set_method = self.or_join(self.ordered_steps[next_step][1])
233+
prev_get_method = self.or_join(self.ordered_steps[prev_step][2])
234+
prev_set_method = self.or_join(self.ordered_steps[prev_step][1])
235+
LOGGER.warning(f"""[GUIDE] INCONSISTENCY WARNING: Unable to perform {name}
236+
because you are going backwards and starting a new iteration by
237+
performing a key method at step {next_step} but the result of the
238+
previous step, step {prev_step}, is STALE.
239+
If you want to use the STALE result of the PREVIOUS step,
240+
you can call {prev_get_method} to get the data, then
241+
{prev_set_method} to set the data, and then recall this method.
242+
If you want to regenerate the data of the PREVIOUS step,
243+
please call {prev_key_method}, and then recall this method.
244+
If you already have the data for THIS step, you can call
245+
{corr_set_method} to set the data.""")
265246

266247
def try_perform_getter_step(
267248
self, zephyr, method, *method_args, **method_kwargs):
@@ -344,24 +325,15 @@ def __init__(self):
344325

345326
# tuple of 2 arrays: producers and attributes
346327
step_order = [
347-
([
348-
self.generate_entityset, self.set_entityset], [
349-
self.get_entityset]), ([
350-
self.generate_label_times, self.set_label_times], [
351-
self.get_label_times]), ([
352-
self.generate_feature_matrix, self.set_feature_matrix], [
353-
self.get_feature_matrix]), ([
354-
self.generate_train_test_split, self.set_train_test_split], [
355-
self.get_train_test_split]), ([
356-
self.fit_pipeline, self.set_fitted_pipeline], [
357-
self.get_fitted_pipeline]), ([
358-
self.predict, self.evaluate], [])]
359-
set_methods = set([self.set_entityset.__name__,
360-
self.set_label_times.__name__,
361-
self.set_feature_matrix.__name__,
362-
self.set_train_test_split.__name__,
363-
self.set_fitted_pipeline.__name__])
364-
self._guide_handler = GuideHandler(step_order, set_methods)
328+
([self.generate_entityset], [self.set_entityset], [self.get_entityset]),
329+
([self.generate_label_times], [self.set_label_times], [self.get_label_times]),
330+
([self.generate_feature_matrix], [self.set_feature_matrix], [self.get_feature_matrix]),
331+
([self.generate_train_test_split], [self.set_train_test_split],
332+
[self.get_train_test_split]),
333+
([self.fit_pipeline], [self.set_fitted_pipeline], [self.get_fitted_pipeline]),
334+
([self.predict, self.evaluate], [], [])
335+
]
336+
self._guide_handler = GuideHandler(step_order)
365337

366338
def GET_ENTITYSET_TYPES(self):
367339
"""Get the supported entityset types and their required dataframes/columns.

0 commit comments

Comments
 (0)