From 4c943bd382c97a8ea7923967f9459993505dfc2c Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 16 Sep 2021 17:37:39 -0700 Subject: [PATCH 1/9] Added missing imports to example code --- doc_src/manual.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc_src/manual.rst b/doc_src/manual.rst index 4106a01..5c18811 100644 --- a/doc_src/manual.rst +++ b/doc_src/manual.rst @@ -11,6 +11,8 @@ We can then import fANOVA and start it by typing >>> from fanova import fANOVA >>> import csv + >>> import os + >>> import numpy as np >>> path = os.path.dirname(os.path.realpath(__file__)) >>> X = np.loadtxt(path + '/example_data/online_lda/online_lda_features.csv', delimiter=",") >>> Y = np.loadtxt(path + '/example_data/online_lda/online_lda_responses.csv', delimiter=",") From 7ab2b66c1f5f2a0662bdcb73fd0f6861b1d9cd79 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 16 Sep 2021 17:57:12 -0700 Subject: [PATCH 2/9] Updated example code with correct parameter name and actual output --- doc_src/manual.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/doc_src/manual.rst b/doc_src/manual.rst index 5c18811..382e0f3 100644 --- a/doc_src/manual.rst +++ b/doc_src/manual.rst @@ -23,12 +23,18 @@ This creates a new fANOVA object and fits the Random Forest on the specified dat To compute now the marginal of the first parameter type: >>> f.quantify_importance((0, )) - 0.075414122571199116 + {(0,): {'individual importance': 0.07567390839783641, + 'total importance': 0.07567390839783641, + 'individual std': 0.020053764191788233, + 'total std': 0.020053764191788233}} fANOVA also allows to specify parameters by their names. - >>> f.quantify_importance(("Col0", )) - 0.075414122571199116 + >>> f.quantify_importance(("x_000", )) + {('x_000',): {'individual importance': 0.07567390839783641, + 'total importance': 0.07567390839783641, + 'individual std': 0.020053764191788233, + 'total std': 0.020053764191788233}} Advanced From b087c725aee30364f5731920d5471e768f090d14 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 20 Sep 2021 13:19:32 -0700 Subject: [PATCH 3/9] Fixed example code (n arg in get_most_important_pairwise_marginals must be kwarg) --- doc_src/manual.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc_src/manual.rst b/doc_src/manual.rst index 382e0f3..adf559a 100644 --- a/doc_src/manual.rst +++ b/doc_src/manual.rst @@ -56,9 +56,9 @@ You can also specify the number of trees in the random forest as well as the min More functions -------------- - * **f.get_most_important_pairwise_marginals(n)** + * **f.get_most_important_pairwise_marginals(n=N)** - Returns the **n** most important pairwise marginals + Returns the **N** most important pairwise marginals * **f.get_most_important_pairwise_marginals(params)** From 6f5d7c5787e69178152c036834d2a20d136985ae Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 21 Sep 2021 10:52:56 -0700 Subject: [PATCH 4/9] Styled code example as code not plain text --- doc_src/manual.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc_src/manual.rst b/doc_src/manual.rst index adf559a..3d5919a 100644 --- a/doc_src/manual.rst +++ b/doc_src/manual.rst @@ -128,7 +128,6 @@ You will also find an extra directory in your specified plot directory called 'i How to load a CSV-file -------------------------- -import numpy as np - -data = np.loadtxt('your_file.csv', delimiter=",") +>>> import numpy as np +>>> data = np.loadtxt('your_file.csv', delimiter=",") From 58a47adedaef8198729d97a3f7ba121a61e19e1b Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 21 Sep 2021 10:55:04 -0700 Subject: [PATCH 5/9] Added indentation to code example --- doc_src/manual.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc_src/manual.rst b/doc_src/manual.rst index 3d5919a..d107cba 100644 --- a/doc_src/manual.rst +++ b/doc_src/manual.rst @@ -128,6 +128,6 @@ You will also find an extra directory in your specified plot directory called 'i How to load a CSV-file -------------------------- ->>> import numpy as np ->>> data = np.loadtxt('your_file.csv', delimiter=",") + >>> import numpy as np + >>> data = np.loadtxt('your_file.csv', delimiter=",") From 20fe826e3657b2d8e1569add0169a8b7cfc61307 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 21 Sep 2021 10:59:58 -0700 Subject: [PATCH 6/9] Fixed broken image URL (pairwise.png) --- doc_src/manual.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc_src/manual.rst b/doc_src/manual.rst index d107cba..8c65bd1 100644 --- a/doc_src/manual.rst +++ b/doc_src/manual.rst @@ -99,7 +99,7 @@ The same can been done for pairwise marginals >>> vis.plot_pairwise_marginal([0,1]) -.. image:: ../examples/example_data/online_lda/figure2.png +.. image:: ../examples/example_data/online_lda/pairwise.png If you are just interested in the N most important pairwise marginals you can plot them through: From 8d1040f40a06614d5ddc9994a4da5399e7fbf1d0 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 21 Sep 2021 11:54:43 -0700 Subject: [PATCH 7/9] Link to data was broken. Changed to repo URL. --- doc_src/manual.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/doc_src/manual.rst b/doc_src/manual.rst index 8c65bd1..15639c0 100644 --- a/doc_src/manual.rst +++ b/doc_src/manual.rst @@ -6,7 +6,7 @@ Manual Quick Start ----------- -To run the examples, just download the `data `_ and start the python console. +To run the examples, download the data from the `github repository `_ and start the python console. We can then import fANOVA and start it by typing >>> from fanova import fANOVA @@ -129,5 +129,12 @@ How to load a CSV-file -------------------------- >>> import numpy as np - >>> data = np.loadtxt('your_file.csv', delimiter=",") + >>> X = np.loadtxt('your_file.csv', delimiter=",") +Alternatively, pandas may be used: + + >>> import pandas as pd + >>> df = pd.read_csv('your_file.csv') + >>> X = df[your_param_columns] + >>> Y = df[your_score_column] + >>> f = fANOVA(X, Y, config_space=cs) From c92cc9911bf2578f242738542520c91046d9636e Mon Sep 17 00:00:00 2001 From: Matt Dirks Date: Thu, 7 Oct 2021 13:09:45 -0700 Subject: [PATCH 8/9] Added `ax` argument to plot_marginal function to allow custom figures and returns the generated plot to allow further modifications by the caller. --- fanova/visualizer.py | 51 ++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/fanova/visualizer.py b/fanova/visualizer.py index 3484012..c919bd7 100644 --- a/fanova/visualizer.py +++ b/fanova/visualizer.py @@ -299,7 +299,7 @@ def generate_marginal(self, p, resolution=100): std = np.sqrt(v) return mean, std - def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumbents=None): + def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumbents=None, ax=None): """ Creates a plot of marginal of a selected parameter @@ -315,9 +315,23 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb whether to call plt.show() to show plot directly as interactive matplotlib-plot incumbents: List[Configuration] list of ConfigSpace.Configurations that are marked as incumbents + ax: AxesSubplot, optional + A matplotlib AxesSubplot in which to place the plot or, if None, a new figure will be created. + + Returns + ------- + ax: AxesSubplot + A matplotlib AxesSubplot containing the plot. To save it to disk use `ax.get_figure().savefig('filename.png')`. """ param, param_name, param_idx = self._get_parameter(param) + # get figure Axes to plot on (or make an empty one) + if (ax is None): + # create empty figure to work with + fig, ax = plt.subplots(1, 1) + else: + fig = ax.get_figure() + # check if categorical if isinstance(param, NumericalHyperparameter): # PREPROCESS @@ -335,10 +349,10 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb if log_scale: if np.diff(grid).std() > 0.000001: self.logger.info("It might be better to plot this parameter '%s' in log-scale.", param_name) - plt.semilogx(grid, mean, 'b', label='predicted %s' % self._y_label) + ax.semilogx(grid, mean, 'b', label='predicted %s' % self._y_label) else: - plt.plot(grid, mean, 'b', label='predicted %s' % self._y_label) - plt.fill_between(grid, upper_curve, lower_curve, facecolor='red', alpha=0.6, label='std') + ax.plot(grid, mean, 'b', label='predicted %s' % self._y_label) + ax.fill_between(grid, upper_curve, lower_curve, facecolor='red', alpha=0.6, label='std') if incumbents is not None: if not isinstance(incumbents, list): @@ -346,15 +360,15 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb values = [inc[param_name] for inc in incumbents if param_name in inc and inc[param_name] is not None] indices = [(np.abs(np.asarray(grid) - val)).argmin() for val in values] if len(indices) > 0: - plt.scatter(list([grid[idx] for idx in indices]), + ax.scatter(list([grid[idx] for idx in indices]), list([mean[idx] for idx in indices]), label='incumbent', c='black', marker='.', zorder=999) - plt.xlabel(param_name) - plt.ylabel(self._y_label) - plt.grid(True) - plt.legend() - plt.tight_layout() + ax.set_xlabel(param_name) + ax.set_ylabel(self._y_label) + ax.grid(True) + ax.legend() + fig.tight_layout() else: # PREPROCESS @@ -376,8 +390,8 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb max_y = mean[0] # PLOT - b = plt.boxplot([[x] for x in mean]) - plt.xticks(indices, labels) + b = ax.boxplot([[x] for x in mean]) + ax.set_xticks(indices, labels) # blow up boxes for box, std_ in zip(b["boxes"], std): y = box.get_ydata() @@ -388,16 +402,17 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb min_y = min(min_y, y[0] - std_) max_y = max(max_y, y[2] + std_) - plt.ylim([min_y, max_y]) + ax.set_ylim([min_y, max_y]) - plt.ylabel(self._y_label) - plt.xlabel(param_name) - plt.tight_layout() + ax.set_ylabel(self._y_label) + ax.set_xlabel(param_name) + fig.tight_layout() if show: plt.show() - else: - return plt + + # Always return the matplotlib plot (to allow users to save it etc) + return ax def create_most_important_pairwise_marginal_plots(self, params=None, n=20, three_d=True, resolution=20): """ From 5b656de6e5a910c71d419a53203f60cc98a091a8 Mon Sep 17 00:00:00 2001 From: Matt Dirks Date: Thu, 7 Oct 2021 13:10:22 -0700 Subject: [PATCH 9/9] Previously, plot_marginal would suggest using log scale, even when log_scale=True already; fixed this. --- fanova/visualizer.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fanova/visualizer.py b/fanova/visualizer.py index c919bd7..e58452e 100644 --- a/fanova/visualizer.py +++ b/fanova/visualizer.py @@ -310,7 +310,7 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb resolution: int Number of samples to generate from the parameter range as values to predict log_scale: boolean - If log scale is required or not. If no value is given, it is deduced from the ConfigSpace provided + Whether to plot using log scale or not. If no value is given, it is deduced from the ConfigSpace provided and from values. show: boolean whether to call plt.show() to show plot directly as interactive matplotlib-plot incumbents: List[Configuration] @@ -325,13 +325,16 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb """ param, param_name, param_idx = self._get_parameter(param) - # get figure Axes to plot on (or make an empty one) + # get figure AxesSubplot to plot on (or make a new one) if (ax is None): # create empty figure to work with - fig, ax = plt.subplots(1, 1) + fig, ax = plt.subplots(1) else: fig = ax.get_figure() + # don't show the figure when user has provided their own figure AxesSubplot + show = False + # check if categorical if isinstance(param, NumericalHyperparameter): # PREPROCESS @@ -342,13 +345,19 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb lower_curve = mean - std upper_curve = mean + std + + # auto-detect whether to do log-scale if log_scale is None: - log_scale = param.log or (np.diff(grid).std() > 0.000001) + # take log value from ConfigSpace + log_scale = param.log + + # auto-detect if log-scale might be better + if not log_scale and (np.diff(grid).std() > 0.000001): + self.logger.info("Plotting this parameter, %s, in log-scale because auto-detected that it might be better." % param_name) + log_scale = True # PLOT if log_scale: - if np.diff(grid).std() > 0.000001: - self.logger.info("It might be better to plot this parameter '%s' in log-scale.", param_name) ax.semilogx(grid, mean, 'b', label='predicted %s' % self._y_label) else: ax.plot(grid, mean, 'b', label='predicted %s' % self._y_label)