Fixed changes requested 2

scikit-learn-contrib · mvargas33 · Sep 13, 2021 · Sep 15, 2021 · Sep 15, 2021 · Sep 15, 2021
commit 787a8d16df04e05f176da61d1ad6d668872d7300
diff --git a/doc/supervised.rst b/doc/supervised.rst
@@ -82,18 +82,18 @@ array([0.49627072, 3.65287282, 6.06079877])
 >>> metric_fun([3.5, 3.6], [5.6, 2.4])
 0.4962707194621285
 
-- Alternatively, you can use `pair_similarity` to return the **score** between
+- Alternatively, you can use `pair_score` to return the **score** between
   pairs of points, the larger the **score**, the more similar the pair
   and vice-versa. For Mahalanobis learners, it is equal to the opposite
   of the distance.
 
->>> score = nca.pair_similarity([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]])
+>>> score = nca.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]])
 >>> score
 array([-0.49627072, -3.65287282, -6.06079877])
 
-  This is useful because `pair_similarity` matches the **score** semantic of 
-  scikit-learn's `Classification metrics
-  <https://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics>`_.
+This is useful because `pair_score` matches the **score** semantic of 
+scikit-learn's `Classification metrics
+<https://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics>`_.
 
 .. note::
 

diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst
@@ -175,16 +175,16 @@ array([7.27607365, 0.88853014])
 >>> metric_fun([3.5, 3.6, 5.2], [5.6, 2.4, 6.7])
 7.276073646278203
 
-- Alternatively, you can use `pair_similarity` to return the **score** between
+- Alternatively, you can use `pair_score` to return the **score** between
   pairs of points, the larger the **score**, the more similar the pair
   and vice-versa. For Mahalanobis learners, it is equal to the opposite
   of the distance.
 
->>> score = mmc.pair_similarity([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]])
+>>> score = mmc.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]])
 >>> score
 array([-0.49627072, -3.65287282, -6.06079877])
 
-  This is useful because `pair_similarity` matches the **score** semantic of 
+  This is useful because `pair_score` matches the **score** semantic of 
   scikit-learn's `Classification metrics
   <https://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics>`_.
 
@@ -356,7 +356,7 @@ returns the `sklearn.metrics.roc_auc_score` (which is threshold-independent).
 .. note::
    See :ref:`fit_ws` for more details on metric learners functions that are
    not specific to learning on pairs, like `transform`, `pair_distance`,
-   `pair_similarity`, `get_metric` and `get_mahalanobis_matrix`.
+   `pair_score`, `get_metric` and `get_mahalanobis_matrix`.
 
 Algorithms
 ----------
@@ -703,7 +703,7 @@ of triplets that have the right predicted ordering.
 .. note::
    See :ref:`fit_ws` for more details on metric learners functions that are
    not specific to learning on pairs, like `transform`, `pair_distance`,
-   `pair_similarity`, `get_metric` and `get_mahalanobis_matrix`.
+   `pair_score`, `get_metric` and `get_mahalanobis_matrix`.
 
 
 
@@ -871,7 +871,7 @@ of quadruplets have the right predicted ordering.
 .. note::
    See :ref:`fit_ws` for more details on metric learners functions that are
    not specific to learning on pairs, like `transform`, `pair_distance`,
-   `pair_similarity`, `get_metric` and `get_mahalanobis_matrix`.
+   `pair_score`, `get_metric` and `get_mahalanobis_matrix`.
 
 
 

diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
@@ -41,8 +41,10 @@ def score_pairs(self, pairs):
 
     Parameters
     ----------
-    pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features)
-      3D array of pairs.
+    pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
+      3D Array of pairs to score, with each row corresponding to two points,
+      for 2D array of indices of pairs if the metric learner uses a
+      preprocessor.
 
     Returns
     -------
@@ -52,27 +54,29 @@ def score_pairs(self, pairs):
     See Also
     --------
     get_metric : a method that returns a function to compute the metric between
-      two points. The difference with `score_pairs` is that it works on two
-      1D arrays and cannot use a preprocessor. Besides, the returned function
-      is independent of the metric learner and hence is  not modified if the
-      metric learner is.
+      two points. The difference between `pair_score` and `pair_distance` is
+      that it works on two 1D arrays and cannot use a preprocessor. Besides,
+      the returned function is independent of the metric learner and hence is
+      not modified if the metric learner is.
     """
 
   @abstractmethod
-  def pair_similarity(self, pairs):
+  def pair_score(self, pairs):
     """
     .. versionadded:: 0.7.0 Compute the similarity score between pairs
 
-    Returns the similarity score between pairs. Depending on the algorithm,
-    this method can return the learned similarity score between pairs,
-    or the inverse of the distance learned between two pairs. The more the
-    score, the more similar the pairs. All learners have access to this
+    Returns the similarity score between pairs of points. Depending on the
+    algorithm, this method can return the learned similarity score between
+    pairs, or the opposite of the distance learned between pairs. The larger
+    the score, the more similar the pair. All learners have access to this
     method.
 
     Parameters
     ----------
-    pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features)
-      3D array of pairs.
+    pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
+      3D Array of pairs to score, with each row corresponding to two points,
+      for 2D array of indices of pairs if the metric learner uses a
+      preprocessor.
 
     Returns
     -------
@@ -82,7 +86,7 @@ def pair_similarity(self, pairs):
     See Also
     --------
     get_metric : a method that returns a function to compute the metric between
-      two points. The difference with `pair_similarity` is that it works on two
+      two points. The difference with `pair_score` is that it works on two
       1D arrays and cannot use a preprocessor. Besides, the returned function
       is independent of the metric learner and hence is not modified if the
       metric learner is.
@@ -91,17 +95,18 @@ def pair_similarity(self, pairs):
   @abstractmethod
   def pair_distance(self, pairs):
     """
-    .. versionadded:: 0.7.0 Compute the distance score between pairs
+    .. versionadded:: 0.7.0 Compute the distance between pairs
 
-    Returns the distance score between pairs. For Mahalanobis learners, it
-    returns the pseudo-distance between pairs. It is not available for
-    learners that does not learn a distance or pseudo-distance, an error
-    will be shown instead.
+    Returns the (pseudo) distance between pairs, when available. For metric
+    learners that do not learn a (pseudo) distance, an error is thrown
+    instead.
 
     Parameters
     ----------
-    pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features)
-      3D array of pairs.
+    pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
+      3D Array of pairs to score, with each row corresponding to two points,
+      for 2D array of indices of pairs if the metric learner uses a
+      preprocessor.
 
     Returns
     -------
@@ -170,10 +175,10 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic',
 
   @abstractmethod
   def get_metric(self):
-    """Returns a function that takes as input two 1D arrays and outputs the
-    learned metric score on these two points. Depending on the algorithm, it
-    can return the distance or similarity function between pairs. It always
-    returns what the specific algorithm learns.
+    """Returns a function that takes as input two 1D arrays and outputs
+    the value of the learned metric on these two points. Depending on the
+    algorithm, it can return a distance or a score function between pairs.
+    It always returns what the specific algorithm learns.
 
     This function will be independent from the metric learner that learned it
     (it will not be modified if the initial metric learner is modified),
@@ -206,13 +211,13 @@ def get_metric(self):
 
     See Also
     --------
-    pair_distance : a method that returns the distance score between several
+    pair_distance : a method that returns the distance between several
       pairs of points. Unlike `get_metric`, this is a method of the metric
       learner and therefore can change if the metric learner changes. Besides,
       it can use the metric learner's preprocessor, and works on concatenated
       arrays.
 
-    pair_similarity : a method that returns the similarity score between
+    pair_score : a method that returns the similarity score between
       several pairs of points. Unlike `get_metric`, this is a method of the
       metric learner and therefore can change if the metric learner changes.
       Besides, it can use the metric learner's preprocessor, and works on
@@ -265,7 +270,7 @@ def score_pairs(self, pairs):
 
     .. warning::
         This method will be removed in 0.8.0. Please refer to `pair_distance`
-        or `pair_similarity`. This change will occur in order to add learners
+        or `pair_score`. This change will occur in order to add learners
         that don't necessarily learn a Mahalanobis distance.
 
     Returns the learned Mahalanobis distance between pairs.
@@ -302,14 +307,14 @@ def score_pairs(self, pairs):
       that describes Mahalanobis Distances.
     """
     dpr_msg = ("score_pairs will be deprecated in release 0.7.0. "
-               "Use pair_similarity to compute similarities, or "
+               "Use pair_score to compute similarity scores, or "
                "pair_distances to compute distances.")
     warnings.warn(dpr_msg, category=FutureWarning)
     return self.pair_distance(pairs)
 
-  def pair_similarity(self, pairs):
+  def pair_score(self, pairs):
     """
-    Returns the inverse of the learned Mahalanobis distance between pairs.
+    Returns the opposite of the learned Mahalanobis distance between pairs.
 
     Parameters
     ----------
@@ -321,12 +326,12 @@ def pair_similarity(self, pairs):
     Returns
     -------
     scores : `numpy.ndarray` of shape=(n_pairs,)
-      The inverse of the learned Mahalanobis distance for every pair.
+      The opposite of the learned Mahalanobis distance for every pair.
 
     See Also
     --------
     get_metric : a method that returns a function to compute the metric between
-      two points. The difference with `pair_similarity` is that it works on two
+      two points. The difference with `pair_score` is that it works on two
       1D arrays and cannot use a preprocessor. Besides, the returned function
       is independent of the metric learner and hence is not modified if the
       metric learner is.
@@ -517,7 +522,7 @@ def decision_function(self, pairs):
     pairs = check_input(pairs, type_of_inputs='tuples',
                         preprocessor=self.preprocessor_,
                         estimator=self, tuple_size=self._tuple_size)
-    return self.pair_similarity(pairs)
+    return self.pair_score(pairs)
 
   def score(self, pairs, y):
     """Computes score of pairs similarity prediction.
@@ -787,8 +792,8 @@ def decision_function(self, triplets):
     triplets = check_input(triplets, type_of_inputs='tuples',
                            preprocessor=self.preprocessor_,
                            estimator=self, tuple_size=self._tuple_size)
-    return (self.pair_similarity(triplets[:, :2]) -
-            self.pair_similarity(triplets[:, [0, 2]]))
+    return (self.pair_score(triplets[:, :2]) -
+            self.pair_score(triplets[:, [0, 2]]))
 
   def score(self, triplets):
     """Computes score on input triplets.
@@ -872,8 +877,8 @@ def decision_function(self, quadruplets):
     quadruplets = check_input(quadruplets, type_of_inputs='tuples',
                               preprocessor=self.preprocessor_,
                               estimator=self, tuple_size=self._tuple_size)
-    return (self.pair_similarity(quadruplets[:, :2]) -
-            self.pair_similarity(quadruplets[:, 2:]))
+    return (self.pair_score(quadruplets[:, :2]) -
+            self.pair_score(quadruplets[:, 2:]))
 
   def score(self, quadruplets):
     """Computes score on input quadruplets

diff --git a/test/test_base_metric.py b/test/test_base_metric.py
@@ -284,12 +284,12 @@ def test_score_pairs_warning(estimator, build_dataset):
   model = clone(estimator)
   set_random_state(model)
 
-  # we fit the metric learner on it and then we call score_apirs on some
+  # We fit the metric learner on it and then we call score_pairs on some
   # points
   model.fit(*remove_y(model, input_data, labels))
 
   msg = ("score_pairs will be deprecated in release 0.7.0. "
-         "Use pair_similarity to compute similarities, or "
+         "Use pair_score to compute similarity scores, or "
          "pair_distances to compute distances.")
   with pytest.warns(FutureWarning) as raised_warning:
     score = model.score_pairs([[X[0], X[1]], ])

diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py
@@ -49,14 +49,14 @@ def test_predict_monotonous(estimator, build_dataset,
   pairs_train, pairs_test, y_train, y_test = train_test_split(input_data,
                                                               labels)
   estimator.fit(pairs_train, y_train)
-  distances = estimator.pair_distance(pairs_test)
+  scores = estimator.pair_score(pairs_test)
   predictions = estimator.predict(pairs_test)
-  min_dissimilar = np.min(distances[predictions == -1])
-  max_similar = np.max(distances[predictions == 1])
-  assert max_similar <= min_dissimilar
-  separator = np.mean([min_dissimilar, max_similar])
-  assert (predictions[distances > separator] == -1).all()
-  assert (predictions[distances < separator] == 1).all()
+  max_dissimilar = np.max(scores[predictions == -1])
+  min_similar = np.min(scores[predictions == 1])
+  assert max_dissimilar <= min_similar
+  separator = np.mean([max_dissimilar, min_similar])
+  assert (predictions[scores < separator] == -1).all()
+  assert (predictions[scores > separator] == 1).all()
 
 
 @pytest.mark.parametrize('with_preprocessor', [True, False])
@@ -65,15 +65,17 @@ def test_predict_monotonous(estimator, build_dataset,
 def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset,
                                               with_preprocessor):
   """Test that a NotFittedError is raised if someone tries to use
-  pair_distance, decision_function, get_metric, transform or
+  pair_score, score_pairs, decision_function, get_metric, transform or
   get_mahalanobis_matrix on input data and the metric learner
   has not been fitted."""
   input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
   estimator = clone(estimator)
   estimator.set_params(preprocessor=preprocessor)
   set_random_state(estimator)
+  with pytest.raises(NotFittedError): # Remove in 0.8.0
+    estimator.score_pairs(input_data)
   with pytest.raises(NotFittedError):
-    estimator.pair_distance(input_data)
+    estimator.pair_score(input_data)
   with pytest.raises(NotFittedError):
     estimator.decision_function(input_data)
   with pytest.raises(NotFittedError):

diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py
@@ -147,8 +147,17 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor):
 
   pairs = np.array([[X[0], X[1]], [X[0], X[2]]])
   pairs_variants, _ = generate_array_like(pairs)
+  msg = "" 
+  # Todo in 0.7.0: Change 'msg' for the message that says "This learner does 
+  # not have pair_distance"
   for pairs_variant in pairs_variants:
-    estimator.pair_distance(pairs_variant)
+    estimator.pair_score(pairs_variant) # All learners have pair_score
+    # But all of them will have pair_distance
+    with pytest.raises(Exception) as raised_exception:
+      estimator.pair_distance(pairs_variant)
+    if raised_exception is not None:
+      assert msg == raised_exception.value.args[0]
+
 
 
 @pytest.mark.parametrize('with_preprocessor', [True, False])