From 4c943bd382c97a8ea7923967f9459993505dfc2c Mon Sep 17 00:00:00 2001
From: Matt <md4000000@gmail.com>
Date: Thu, 16 Sep 2021 17:37:39 -0700
Subject: [PATCH 1/9] Added missing imports to example code

---
 doc_src/manual.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc_src/manual.rst b/doc_src/manual.rst
index 4106a01..5c18811 100644
--- a/doc_src/manual.rst
+++ b/doc_src/manual.rst
@@ -11,6 +11,8 @@ We can then import fANOVA and start it by typing
 
     >>> from fanova import fANOVA
     >>> import csv
+    >>> import os
+    >>> import numpy as np
     >>> path = os.path.dirname(os.path.realpath(__file__))
     >>> X = np.loadtxt(path + '/example_data/online_lda/online_lda_features.csv', delimiter=",")
     >>> Y = np.loadtxt(path + '/example_data/online_lda/online_lda_responses.csv', delimiter=",")

From 7ab2b66c1f5f2a0662bdcb73fd0f6861b1d9cd79 Mon Sep 17 00:00:00 2001
From: Matt <md4000000@gmail.com>
Date: Thu, 16 Sep 2021 17:57:12 -0700
Subject: [PATCH 2/9] Updated example code with correct parameter name and
 actual output

---
 doc_src/manual.rst | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/doc_src/manual.rst b/doc_src/manual.rst
index 5c18811..382e0f3 100644
--- a/doc_src/manual.rst
+++ b/doc_src/manual.rst
@@ -23,12 +23,18 @@ This creates a new fANOVA object and fits the Random Forest on the specified dat
 To compute now the marginal of the first parameter type:
 
     >>> f.quantify_importance((0, ))
-        0.075414122571199116
+        {(0,): {'individual importance': 0.07567390839783641,
+        'total importance': 0.07567390839783641,
+        'individual std': 0.020053764191788233,
+        'total std': 0.020053764191788233}}
 
 fANOVA also allows to specify parameters by their names.
 
-    >>> f.quantify_importance(("Col0", ))
-    	0.075414122571199116
+    >>> f.quantify_importance(("x_000", ))
+        {('x_000',): {'individual importance': 0.07567390839783641,
+        'total importance': 0.07567390839783641,
+        'individual std': 0.020053764191788233,
+        'total std': 0.020053764191788233}}
 
 
 Advanced

From b087c725aee30364f5731920d5471e768f090d14 Mon Sep 17 00:00:00 2001
From: Matt <md4000000@gmail.com>
Date: Mon, 20 Sep 2021 13:19:32 -0700
Subject: [PATCH 3/9] Fixed example code (n arg in
 get_most_important_pairwise_marginals must be kwarg)

---
 doc_src/manual.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc_src/manual.rst b/doc_src/manual.rst
index 382e0f3..adf559a 100644
--- a/doc_src/manual.rst
+++ b/doc_src/manual.rst
@@ -56,9 +56,9 @@ You can also specify the number of trees in the random forest as well as the min
 More functions
 --------------
 
-    * **f.get_most_important_pairwise_marginals(n)**
+    * **f.get_most_important_pairwise_marginals(n=N)**
 
-    Returns the **n** most important pairwise marginals
+    Returns the **N** most important pairwise marginals
 
     * **f.get_most_important_pairwise_marginals(params)**
 

From 6f5d7c5787e69178152c036834d2a20d136985ae Mon Sep 17 00:00:00 2001
From: Matt <md4000000@gmail.com>
Date: Tue, 21 Sep 2021 10:52:56 -0700
Subject: [PATCH 4/9] Styled code example as code not plain text

---
 doc_src/manual.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc_src/manual.rst b/doc_src/manual.rst
index adf559a..3d5919a 100644
--- a/doc_src/manual.rst
+++ b/doc_src/manual.rst
@@ -128,7 +128,6 @@ You will also find an extra directory in your specified plot directory called 'i
 How to load a CSV-file
 --------------------------
 
-import numpy as np
-
-data = np.loadtxt('your_file.csv', delimiter=",")
+>>> import numpy as np
+>>> data = np.loadtxt('your_file.csv', delimiter=",")
 

From 58a47adedaef8198729d97a3f7ba121a61e19e1b Mon Sep 17 00:00:00 2001
From: Matt <md4000000@gmail.com>
Date: Tue, 21 Sep 2021 10:55:04 -0700
Subject: [PATCH 5/9] Added indentation to code example

---
 doc_src/manual.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc_src/manual.rst b/doc_src/manual.rst
index 3d5919a..d107cba 100644
--- a/doc_src/manual.rst
+++ b/doc_src/manual.rst
@@ -128,6 +128,6 @@ You will also find an extra directory in your specified plot directory called 'i
 How to load a CSV-file
 --------------------------
 
->>> import numpy as np
->>> data = np.loadtxt('your_file.csv', delimiter=",")
+    >>> import numpy as np
+    >>> data = np.loadtxt('your_file.csv', delimiter=",")
 

From 20fe826e3657b2d8e1569add0169a8b7cfc61307 Mon Sep 17 00:00:00 2001
From: Matt <md4000000@gmail.com>
Date: Tue, 21 Sep 2021 10:59:58 -0700
Subject: [PATCH 6/9] Fixed broken image URL (pairwise.png)

---
 doc_src/manual.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc_src/manual.rst b/doc_src/manual.rst
index d107cba..8c65bd1 100644
--- a/doc_src/manual.rst
+++ b/doc_src/manual.rst
@@ -99,7 +99,7 @@ The same can been done for pairwise marginals
 
     >>> vis.plot_pairwise_marginal([0,1])
 
-.. image:: ../examples/example_data/online_lda/figure2.png
+.. image:: ../examples/example_data/online_lda/pairwise.png
 
 
 If you are just interested in the N most important pairwise marginals you can plot them through:

From 8d1040f40a06614d5ddc9994a4da5399e7fbf1d0 Mon Sep 17 00:00:00 2001
From: Matt <md4000000@gmail.com>
Date: Tue, 21 Sep 2021 11:54:43 -0700
Subject: [PATCH 7/9] Link to data was broken. Changed to repo URL.

---
 doc_src/manual.rst | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc_src/manual.rst b/doc_src/manual.rst
index 8c65bd1..15639c0 100644
--- a/doc_src/manual.rst
+++ b/doc_src/manual.rst
@@ -6,7 +6,7 @@ Manual
 
 Quick Start
 -----------
-To run the examples, just download the `data <https://github.com/automl/fanova/blob/master/fanova/example/online_lda.tar.gz>`_ and start the python console.
+To run the examples, download the data from the `github repository <https://github.com/automl/fanova/tree/master/examples/example_data/online_lda>`_ and start the python console.
 We can then import fANOVA and start it by typing
 
     >>> from fanova import fANOVA
@@ -129,5 +129,12 @@ How to load a CSV-file
 --------------------------
 
     >>> import numpy as np
-    >>> data = np.loadtxt('your_file.csv', delimiter=",")
+    >>> X = np.loadtxt('your_file.csv', delimiter=",")
 
+Alternatively, pandas may be used:
+
+    >>> import pandas as pd
+    >>> df = pd.read_csv('your_file.csv')
+    >>> X = df[your_param_columns]
+    >>> Y = df[your_score_column]
+    >>> f = fANOVA(X, Y, config_space=cs)

From c92cc9911bf2578f242738542520c91046d9636e Mon Sep 17 00:00:00 2001
From: Matt Dirks <matt@skylogic.ca>
Date: Thu, 7 Oct 2021 13:09:45 -0700
Subject: [PATCH 8/9] Added `ax` argument to plot_marginal function to allow
 custom figures and returns the generated plot to allow further modifications
 by the caller.

---
 fanova/visualizer.py | 51 ++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/fanova/visualizer.py b/fanova/visualizer.py
index 3484012..c919bd7 100644
--- a/fanova/visualizer.py
+++ b/fanova/visualizer.py
@@ -299,7 +299,7 @@ def generate_marginal(self, p, resolution=100):
             std = np.sqrt(v)
             return mean, std
 
-    def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumbents=None):
+    def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumbents=None, ax=None):
         """
         Creates a plot of marginal of a selected parameter
 
@@ -315,9 +315,23 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
             whether to call plt.show() to show plot directly as interactive matplotlib-plot
         incumbents: List[Configuration]
             list of ConfigSpace.Configurations that are marked as incumbents
+        ax: AxesSubplot, optional
+            A matplotlib AxesSubplot in which to place the plot or, if None, a new figure will be created.
+
+        Returns
+        -------
+        ax: AxesSubplot
+            A matplotlib AxesSubplot containing the plot. To save it to disk use `ax.get_figure().savefig('filename.png')`.
         """
         param, param_name, param_idx = self._get_parameter(param)
 
+        # get figure Axes to plot on (or make an empty one)
+        if (ax is None):
+            # create empty figure to work with
+            fig, ax = plt.subplots(1, 1)
+        else:
+            fig = ax.get_figure()
+
         # check if categorical
         if isinstance(param, NumericalHyperparameter):
             # PREPROCESS
@@ -335,10 +349,10 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
             if log_scale:
                 if np.diff(grid).std() > 0.000001:
                     self.logger.info("It might be better to plot this parameter '%s' in log-scale.", param_name)
-                plt.semilogx(grid, mean, 'b', label='predicted %s' % self._y_label)
+                ax.semilogx(grid, mean, 'b', label='predicted %s' % self._y_label)
             else:
-                plt.plot(grid, mean, 'b', label='predicted %s' % self._y_label)
-            plt.fill_between(grid, upper_curve, lower_curve, facecolor='red', alpha=0.6, label='std')
+                ax.plot(grid, mean, 'b', label='predicted %s' % self._y_label)
+            ax.fill_between(grid, upper_curve, lower_curve, facecolor='red', alpha=0.6, label='std')
 
             if incumbents is not None:
                 if not isinstance(incumbents, list):
@@ -346,15 +360,15 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
                 values = [inc[param_name] for inc in incumbents if param_name in inc and inc[param_name] is not None]
                 indices = [(np.abs(np.asarray(grid) - val)).argmin() for val in values]
                 if len(indices) > 0:
-                    plt.scatter(list([grid[idx] for idx in indices]),
+                    ax.scatter(list([grid[idx] for idx in indices]),
                                 list([mean[idx] for idx in indices]),
                                 label='incumbent', c='black', marker='.', zorder=999)
 
-            plt.xlabel(param_name)
-            plt.ylabel(self._y_label)
-            plt.grid(True)
-            plt.legend()
-            plt.tight_layout()
+            ax.set_xlabel(param_name)
+            ax.set_ylabel(self._y_label)
+            ax.grid(True)
+            ax.legend()
+            fig.tight_layout()
 
         else:
             # PREPROCESS
@@ -376,8 +390,8 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
             max_y = mean[0]
 
             # PLOT
-            b = plt.boxplot([[x] for x in mean])
-            plt.xticks(indices, labels)
+            b = ax.boxplot([[x] for x in mean])
+            ax.set_xticks(indices, labels)
             # blow up boxes
             for box, std_ in zip(b["boxes"], std):
                 y = box.get_ydata()
@@ -388,16 +402,17 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
                 min_y = min(min_y, y[0] - std_)
                 max_y = max(max_y, y[2] + std_)
 
-            plt.ylim([min_y, max_y])
+            ax.set_ylim([min_y, max_y])
 
-            plt.ylabel(self._y_label)
-            plt.xlabel(param_name)
-            plt.tight_layout()
+            ax.set_ylabel(self._y_label)
+            ax.set_xlabel(param_name)
+            fig.tight_layout()
 
         if show:
             plt.show()
-        else:
-            return plt
+        
+        # Always return the matplotlib plot (to allow users to save it etc)
+        return ax
 
     def create_most_important_pairwise_marginal_plots(self, params=None, n=20, three_d=True, resolution=20):
         """

From 5b656de6e5a910c71d419a53203f60cc98a091a8 Mon Sep 17 00:00:00 2001
From: Matt Dirks <matt@skylogic.ca>
Date: Thu, 7 Oct 2021 13:10:22 -0700
Subject: [PATCH 9/9] Previously, plot_marginal would suggest using log scale,
 even when log_scale=True already; fixed this.

---
 fanova/visualizer.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/fanova/visualizer.py b/fanova/visualizer.py
index c919bd7..e58452e 100644
--- a/fanova/visualizer.py
+++ b/fanova/visualizer.py
@@ -310,7 +310,7 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
         resolution: int
             Number of samples to generate from the parameter range as values to predict
         log_scale: boolean
-            If log scale is required or not. If no value is given, it is deduced from the ConfigSpace provided
+            Whether to plot using log scale or not. If no value is given, it is deduced from the ConfigSpace provided and from values.
         show: boolean
             whether to call plt.show() to show plot directly as interactive matplotlib-plot
         incumbents: List[Configuration]
@@ -325,13 +325,16 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
         """
         param, param_name, param_idx = self._get_parameter(param)
 
-        # get figure Axes to plot on (or make an empty one)
+        # get figure AxesSubplot to plot on (or make a new one)
         if (ax is None):
             # create empty figure to work with
-            fig, ax = plt.subplots(1, 1)
+            fig, ax = plt.subplots(1)
         else:
             fig = ax.get_figure()
 
+            # don't show the figure when user has provided their own figure AxesSubplot
+            show = False
+
         # check if categorical
         if isinstance(param, NumericalHyperparameter):
             # PREPROCESS
@@ -342,13 +345,19 @@ def plot_marginal(self, param, resolution=100, log_scale=None, show=True, incumb
             lower_curve = mean - std
             upper_curve = mean + std
 
+
+            # auto-detect whether to do log-scale
             if log_scale is None:
-                log_scale = param.log or (np.diff(grid).std() > 0.000001)
+                # take log value from ConfigSpace
+                log_scale = param.log
+
+                # auto-detect if log-scale might be better
+                if not log_scale and (np.diff(grid).std() > 0.000001):
+                    self.logger.info("Plotting this parameter, %s, in log-scale because auto-detected that it might be better." % param_name)
+                    log_scale = True
 
             # PLOT
             if log_scale:
-                if np.diff(grid).std() > 0.000001:
-                    self.logger.info("It might be better to plot this parameter '%s' in log-scale.", param_name)
                 ax.semilogx(grid, mean, 'b', label='predicted %s' % self._y_label)
             else:
                 ax.plot(grid, mean, 'b', label='predicted %s' % self._y_label)