From 6b789d39b1f9859a7efac9bf7e6b539bb584acbd Mon Sep 17 00:00:00 2001
From: RobinVogel <robvogel0@gmail.com>
Date: Thu, 14 Nov 2019 17:58:23 +0100
Subject: [PATCH 1/8] maj

---
 test_components.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 test_components.py

diff --git a/test_components.py b/test_components.py
new file mode 100644
index 00000000..6edf73db
--- /dev/null
+++ b/test_components.py
@@ -0,0 +1,21 @@
+import numpy as np
+import pytest
+from numpy.linalg import LinAlgError
+from scipy.stats import ortho_group
+
+rng = np.random.RandomState(42)
+
+# an orthonormal matrix useful for creating matrices with given
+# eigenvalues:
+P = ortho_group.rvs(7, random_state=rng)
+
+# matrix with a determinant still high but which should be considered as a
+# non-definite matrix (to check we don't test the definiteness with the
+# determinant which is a bad strategy)
+M = np.diag([1e5, 1e5, 1e5, 1e5, 1e5, 1e5, 1e-20])
+M = P.dot(M).dot(P.T)
+assert np.abs(np.linalg.det(M)) > 10
+assert np.linalg.slogdet(M)[1] > 1  # (just to show that the computed
+# determinant is far from null)
+with pytest.raises(LinAlgError) as err_msg:
+  np.linalg.cholesky(M)

From 275c69a8493dfba872aaf2db6dbaad1acbd7c4e0 Mon Sep 17 00:00:00 2001
From: RobinVogel <robvogel0@gmail.com>
Date: Thu, 28 Nov 2019 17:01:07 +0100
Subject: [PATCH 2/8] added fit checks

---
 metric_learn/base_metric.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
index 6feccc72..f238ccd2 100644
--- a/metric_learn/base_metric.py
+++ b/metric_learn/base_metric.py
@@ -215,6 +215,7 @@ def score_pairs(self, pairs):
     :ref:`mahalanobis_distances` : The section of the project documentation
       that describes Mahalanobis Distances.
     """
+    check_is_fitted(self, 'components_')
     pairs = check_input(pairs, type_of_inputs='tuples',
                         preprocessor=self.preprocessor_,
                         estimator=self, tuple_size=2)
@@ -240,12 +241,15 @@ def transform(self, X):
     X_embedded : `numpy.ndarray`, shape=(n_samples, n_components)
       The embedded data points.
     """
+    check_is_fitted(self, 'components_')
+
     X_checked = check_input(X, type_of_inputs='classic', estimator=self,
                             preprocessor=self.preprocessor_,
                             accept_sparse=True)
     return X_checked.dot(self.components_.T)
 
   def get_metric(self):
+    check_is_fitted(self, 'components_')
     components_T = self.components_.T.copy()
 
     def metric_fun(u, v, squared=False):
@@ -285,6 +289,7 @@ def metric(self):
     """Deprecated. Will be removed in v0.6.0. Use `get_mahalanobis_matrix`
     instead"""
     # TODO: remove this method in version 0.6.0
+    check_is_fitted(self, 'components_')
     warnings.warn(("`metric` is deprecated since version 0.5.0 and will be "
                    "removed in 0.6.0. Use `get_mahalanobis_matrix` instead."),
                   DeprecationWarning)
@@ -298,6 +303,7 @@ def get_mahalanobis_matrix(self):
     M : `numpy.ndarray`, shape=(n_features, n_features)
       The copy of the learned Mahalanobis matrix.
     """
+    check_is_fitted(self, 'components_')
     return self.components_.T.dot(self.components_)
 
 
@@ -357,6 +363,7 @@ def decision_function(self, pairs):
     y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,)
       The predicted decision function value for each pair.
     """
+    check_is_fitted(self, 'components_')
     pairs = check_input(pairs, type_of_inputs='tuples',
                         preprocessor=self.preprocessor_,
                         estimator=self, tuple_size=self._tuple_size)
@@ -628,6 +635,7 @@ def decision_function(self, quadruplets):
     decision_function : `numpy.ndarray` of floats, shape=(n_constraints,)
       Metric differences.
     """
+    check_is_fitted(self, 'components_')
     quadruplets = check_input(quadruplets, type_of_inputs='tuples',
                               preprocessor=self.preprocessor_,
                               estimator=self, tuple_size=self._tuple_size)

From 1c28b5663dc5f6e9d2f002bda93c4dc9072471cd Mon Sep 17 00:00:00 2001
From: RobinVogel <robvogel0@gmail.com>
Date: Thu, 28 Nov 2019 17:02:31 +0100
Subject: [PATCH 3/8] maj

---
 test_components.py | 21 ---------------------
 1 file changed, 21 deletions(-)
 delete mode 100644 test_components.py

diff --git a/test_components.py b/test_components.py
deleted file mode 100644
index 6edf73db..00000000
--- a/test_components.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import numpy as np
-import pytest
-from numpy.linalg import LinAlgError
-from scipy.stats import ortho_group
-
-rng = np.random.RandomState(42)
-
-# an orthonormal matrix useful for creating matrices with given
-# eigenvalues:
-P = ortho_group.rvs(7, random_state=rng)
-
-# matrix with a determinant still high but which should be considered as a
-# non-definite matrix (to check we don't test the definiteness with the
-# determinant which is a bad strategy)
-M = np.diag([1e5, 1e5, 1e5, 1e5, 1e5, 1e5, 1e-20])
-M = P.dot(M).dot(P.T)
-assert np.abs(np.linalg.det(M)) > 10
-assert np.linalg.slogdet(M)[1] > 1  # (just to show that the computed
-# determinant is far from null)
-with pytest.raises(LinAlgError) as err_msg:
-  np.linalg.cholesky(M)

From 76ffccb9e578cf8220177de801a24451810a1b8d Mon Sep 17 00:00:00 2001
From: RobinVogel <robvogel0@gmail.com>
Date: Thu, 28 Nov 2019 17:09:15 +0100
Subject: [PATCH 4/8] Added checks that the function was fitted.

---
 metric_learn/base_metric.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
index f238ccd2..707b9d8b 100644
--- a/metric_learn/base_metric.py
+++ b/metric_learn/base_metric.py
@@ -242,7 +242,6 @@ def transform(self, X):
       The embedded data points.
     """
     check_is_fitted(self, 'components_')
-
     X_checked = check_input(X, type_of_inputs='classic', estimator=self,
                             preprocessor=self.preprocessor_,
                             accept_sparse=True)

From 340ac69aecfb8aace5376794edaa41f074deed1d Mon Sep 17 00:00:00 2001
From: RobinVogel <robvogel0@gmail.com>
Date: Thu, 28 Nov 2019 17:58:42 +0100
Subject: [PATCH 5/8] Wrote a semi-supervised-rca.

---
 metric_learn/rca.py | 92 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/metric_learn/rca.py b/metric_learn/rca.py
index 2a9ab1e8..839cebcb 100644
--- a/metric_learn/rca.py
+++ b/metric_learn/rca.py
@@ -62,7 +62,7 @@ class RCA(MahalanobisMixin, TransformerMixin):
 
   Examples
   --------
-  >>> from metric_learn import RCA_Supervised
+  >>> from metric_learn import RCA_SemiSupervised
   >>> from sklearn.datasets import load_iris
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
@@ -108,7 +108,7 @@ def fit(self, X, chunks):
 
     Parameters
     ----------
-    data : (n x d) data matrix
+    X : (n x d) data matrix
         Each row corresponds to a single instance
     chunks : (n,) array of ints
         When ``chunks[i] == -1``, point i doesn't belong to any chunklet.
@@ -242,3 +242,91 @@ def fit(self, X, y, random_state='deprecated'):
                                    chunk_size=self.chunk_size,
                                    random_state=self.random_state)
     return RCA.fit(self, X, chunks)
+
+
+class RCA_SemiSupervised(RCA):
+  """Semi-Supervised version of Relevant Components Analysis (RCA)
+
+  `RCA_SemiSupervised` combines data in the form of chunks with
+  data in the form of labeled points that goes through the same
+  process as in `RCA_SemiSupervised`.
+
+  Parameters
+  ----------
+  n_components : int or None, optional (default=None)
+      Dimensionality of reduced space (if None, defaults to dimension of X).
+
+  num_dims : Not used
+
+      .. deprecated:: 0.5.0
+        `num_dims` was deprecated in version 0.5.0 and will
+        be removed in 0.6.0. Use `n_components` instead.
+
+  num_chunks: int, optional
+
+  chunk_size: int, optional
+
+  preprocessor : array-like, shape=(n_samples, n_features) or callable
+      The preprocessor to call to get tuples from indices. If array-like,
+      tuples will be formed like this: X[indices].
+
+  random_state : int or numpy.RandomState or None, optional (default=None)
+      A pseudo random number generator object or a seed for it if int.
+      It is used to randomly sample constraints from labels.
+
+  Attributes
+  ----------
+  components_ : `numpy.ndarray`, shape=(n_components, n_features)
+      The learned linear transformation ``L``.
+  """
+
+  def __init__(self, num_dims='deprecated', n_components=None,
+               pca_comps='deprecated', num_chunks=100, chunk_size=2,
+               preprocessor=None, random_state=None):
+    """Initialize the supervised version of `RCA`."""
+    RCA.__init__(self, num_dims=num_dims, n_components=n_components,
+                 pca_comps=pca_comps, preprocessor=preprocessor)
+    self.num_chunks = num_chunks
+    self.chunk_size = chunk_size
+    self.random_state = random_state
+
+  def fit(self, X, y, X_u, chunks,
+          random_state='deprecated'):
+    """Create constraints from labels and learn the RCA model.
+    Needs num_constraints specified in constructor.
+
+    Parameters
+    ----------
+    X : (n x d) labeled data matrix
+        each row corresponds to a single instance
+    y : (n) data labels
+    X_u : (n x d) unlabeled data matrix
+    chunks : (n,) array of ints
+        When ``chunks[i] == -1``, point i doesn't belong to any chunklet.
+        When ``chunks[i] == j``, point i belongs to chunklet j.
+    random_state : Not used
+      .. deprecated:: 0.5.0
+        `random_state` in the `fit` function was deprecated in version 0.5.0
+        and will be removed in 0.6.0. Set `random_state` at initialization
+        instead (when instantiating a new `RCA_SemiSupervised` object).
+    """
+    if random_state != 'deprecated':
+      warnings.warn('"random_state" parameter in the `fit` function is '
+                    'deprecated. Set `random_state` at initialization '
+                    'instead (when instantiating a new `RCA_SemiSupervised` '
+                    'object).', DeprecationWarning)
+    else:
+      warnings.warn('As of v0.5.0, `RCA_SemiSupervised` now uses the '
+                    '`random_state` given at initialization to sample '
+                    'constraints, not the default `np.random` from the `fit` '
+                    'method, since this argument is now deprecated. '
+                    'This warning will disappear in v0.6.0.',
+                    ChangedBehaviorWarning)
+    X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
+    sup_chunks = Constraints(y).chunks(num_chunks=self.num_chunks,
+                                       chunk_size=self.chunk_size,
+                                       random_state=self.random_state)
+    X_tot = np.concatenate([X, X_u])
+    chunks_tot = np.concatenate([sup_chunks, chunks])
+
+    return RCA.fit(self, X_tot, chunks_tot)

From 36694f67c629022cbdc3b6c8c713c5ce3b5e44b6 Mon Sep 17 00:00:00 2001
From: RobinVogel <robvogel0@gmail.com>
Date: Thu, 28 Nov 2019 18:50:07 +0100
Subject: [PATCH 6/8] added a very simple test

---
 metric_learn/__init__.py  |  5 +++--
 test/metric_learn_test.py | 13 +++++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py
index b036ccfa..9a0268fa 100644
--- a/metric_learn/__init__.py
+++ b/metric_learn/__init__.py
@@ -8,7 +8,7 @@
 from .sdml import SDML, SDML_Supervised
 from .nca import NCA
 from .lfda import LFDA
-from .rca import RCA, RCA_Supervised
+from .rca import RCA, RCA_Supervised, RCA_SemiSupervised
 from .mlkr import MLKR
 from .mmc import MMC, MMC_Supervised
 
@@ -17,4 +17,5 @@
 __all__ = ['Constraints', 'Covariance', 'ITML', 'ITML_Supervised',
            'LMNN', 'LSML', 'LSML_Supervised', 'SDML',
            'SDML_Supervised', 'NCA', 'LFDA', 'RCA', 'RCA_Supervised',
-           'MLKR', 'MMC', 'MMC_Supervised', '__version__']
+           'RCA_SemiSupervised', 'MLKR', 'MMC', 'MMC_Supervised',
+           '__version__']
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index f713a059..6a34402b 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -23,9 +23,9 @@
 from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC,
                           LSML_Supervised, ITML_Supervised, SDML_Supervised,
                           RCA_Supervised, MMC_Supervised, SDML, RCA, ITML,
-                          LSML)
+                          LSML, RCA_SemiSupervised)
 # Import this specially for testing.
-from metric_learn.constraints import wrap_pairs
+from metric_learn.constraints import wrap_pairs, Constraints
 from metric_learn.lmnn import _sum_outer_products
 
 
@@ -1136,6 +1136,15 @@ def test_changed_behaviour_warning_random_state(self):
       rca_supervised.fit(X, y)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
 
+  def test_semi_supervised(self):
+    n = 100
+    X, y = make_classification(random_state=42, n_samples=2 * n)
+    rca_semisupervised = RCA_SemiSupervised(num_chunks=20)
+    cons = Constraints(y[n:])
+    chunks = cons.chunks(num_chunks=20)
+    rca_semisupervised.fit(X[:n], y[:n],
+                           X[n:], chunks)
+
 
 @pytest.mark.parametrize('num_dims', [None, 2])
 def test_deprecation_num_dims_rca(num_dims):

From 77fb53a3429bd956527bb5a3ef567ec0d780c474 Mon Sep 17 00:00:00 2001
From: RobinVogel <robvogel0@gmail.com>
Date: Thu, 28 Nov 2019 18:55:28 +0100
Subject: [PATCH 7/8] typos

---
 metric_learn/base_metric.py | 7 -------
 metric_learn/rca.py         | 2 +-
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
index 707b9d8b..6feccc72 100644
--- a/metric_learn/base_metric.py
+++ b/metric_learn/base_metric.py
@@ -215,7 +215,6 @@ def score_pairs(self, pairs):
     :ref:`mahalanobis_distances` : The section of the project documentation
       that describes Mahalanobis Distances.
     """
-    check_is_fitted(self, 'components_')
     pairs = check_input(pairs, type_of_inputs='tuples',
                         preprocessor=self.preprocessor_,
                         estimator=self, tuple_size=2)
@@ -241,14 +240,12 @@ def transform(self, X):
     X_embedded : `numpy.ndarray`, shape=(n_samples, n_components)
       The embedded data points.
     """
-    check_is_fitted(self, 'components_')
     X_checked = check_input(X, type_of_inputs='classic', estimator=self,
                             preprocessor=self.preprocessor_,
                             accept_sparse=True)
     return X_checked.dot(self.components_.T)
 
   def get_metric(self):
-    check_is_fitted(self, 'components_')
     components_T = self.components_.T.copy()
 
     def metric_fun(u, v, squared=False):
@@ -288,7 +285,6 @@ def metric(self):
     """Deprecated. Will be removed in v0.6.0. Use `get_mahalanobis_matrix`
     instead"""
     # TODO: remove this method in version 0.6.0
-    check_is_fitted(self, 'components_')
     warnings.warn(("`metric` is deprecated since version 0.5.0 and will be "
                    "removed in 0.6.0. Use `get_mahalanobis_matrix` instead."),
                   DeprecationWarning)
@@ -302,7 +298,6 @@ def get_mahalanobis_matrix(self):
     M : `numpy.ndarray`, shape=(n_features, n_features)
       The copy of the learned Mahalanobis matrix.
     """
-    check_is_fitted(self, 'components_')
     return self.components_.T.dot(self.components_)
 
 
@@ -362,7 +357,6 @@ def decision_function(self, pairs):
     y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,)
       The predicted decision function value for each pair.
     """
-    check_is_fitted(self, 'components_')
     pairs = check_input(pairs, type_of_inputs='tuples',
                         preprocessor=self.preprocessor_,
                         estimator=self, tuple_size=self._tuple_size)
@@ -634,7 +628,6 @@ def decision_function(self, quadruplets):
     decision_function : `numpy.ndarray` of floats, shape=(n_constraints,)
       Metric differences.
     """
-    check_is_fitted(self, 'components_')
     quadruplets = check_input(quadruplets, type_of_inputs='tuples',
                               preprocessor=self.preprocessor_,
                               estimator=self, tuple_size=self._tuple_size)
diff --git a/metric_learn/rca.py b/metric_learn/rca.py
index 839cebcb..93292723 100644
--- a/metric_learn/rca.py
+++ b/metric_learn/rca.py
@@ -62,7 +62,7 @@ class RCA(MahalanobisMixin, TransformerMixin):
 
   Examples
   --------
-  >>> from metric_learn import RCA_SemiSupervised
+  >>> from metric_learn import RCA_Supervised
   >>> from sklearn.datasets import load_iris
   >>> iris_data = load_iris()
   >>> X = iris_data['data']

From b3445c50a4242ef511ab4ee516017bcdeb7aac67 Mon Sep 17 00:00:00 2001
From: RobinVogel <robvogel0@gmail.com>
Date: Fri, 29 Nov 2019 11:48:11 +0100
Subject: [PATCH 8/8] test cov correction

---
 test/metric_learn_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 6a34402b..20c94f46 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -1144,6 +1144,8 @@ def test_semi_supervised(self):
     chunks = cons.chunks(num_chunks=20)
     rca_semisupervised.fit(X[:n], y[:n],
                            X[n:], chunks)
+    rca_semisupervised.fit(X[:n], y[:n],
+                           X[n:], chunks, random_state=42)
 
 
 @pytest.mark.parametrize('num_dims', [None, 2])