scikit-learn-contrib · bellet · Apr 15, 2019 · Feb 4, 2019 · Feb 5, 2019 · Feb 6, 2019
diff --git a/doc/conf.py b/doc/conf.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import sys
 
 extensions = [
     'sphinx.ext.autodoc',
@@ -8,7 +9,8 @@
     'sphinx.ext.mathjax',
     'numpydoc',
     'sphinx_gallery.gen_gallery',
-    'sphinx.ext.doctest'
+    'sphinx.ext.doctest',
+    'sphinx.ext.intersphinx'
 ]
 
 templates_path = ['_templates']
@@ -39,3 +41,12 @@
 # Option to hide doctests comments in the documentation (like # doctest:
 # +NORMALIZE_WHITESPACE for instance)
 trim_doctest_flags = True
+
+# intersphinx configuration
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/{.major}'.format(
+        sys.version_info), None),
+    'numpy': ('https://docs.scipy.org/doc/numpy/', None),
+    'scipy': ('https://docs.scipy.org/doc/scipy/reference', None),
+    'scikit-learn': ('https://scikit-learn.org/stable/', None)
+}
diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst
@@ -148,8 +148,44 @@ tuples you're working with (pairs, triplets...). See the docstring of the
 `score` method of the estimator you use.
 
 
+Learning on pairs
+=================
+
+Some metric learning algorithms learn on pairs of samples. In this case, one
+should provide the algorithm with ``n_samples`` pairs of points, with a
+corresponding target containing ``n_samples`` values being either +1 or -1.
+These values indicate whether the given pairs are similar points or
+dissimilar points.
+
+
+.. _calibration:
+
+Thresholding
+------------
+In order to predict whether a new pair represents similar or dissimilar
+samples, we need to set a distance threshold, so that points closer (in the
+learned space) than this threshold are predicted as similar, and points further
+away are predicted as dissimilar. Several methods are possible for this
+thresholding.
+
+- **default**: Unless explicitely stated in the `fit` method documentation
+  of the estimator, the threshold is set with the method
+  `set_default_threshold` on the trainset.
+
+- **manual**: calling `set_threshold`, the user can
+  manually set the threshold to a particular value.
+
+- **calibrated**: calling `calibrate_threshold`, the user can
+  calibrate the threshold to achieve a particular score on a validation set,
+  the score being among the classical scores for classification (accuracy, f1
+  score...).
+
+
+See also: `sklearn.calibration`.
+
+
 Algorithms
-==================
+==========
 
 ITML
 ----
@@ -192,39 +228,6 @@ programming.
     .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/
        itml/
 
-
-LSML
-----
-
-`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared
-Residual
-
-.. topic:: Example Code:
-
-::
-
-    from metric_learn import LSML
-
-    quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]],
-                   [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]],
-                   [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]],
-                   [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]]
-
-    # we want to make closer points where the first feature is close, and
-    # further if the second feature is close
-
-    lsml = LSML()
-    lsml.fit(quadruplets)
-
-.. topic:: References:
-
-    .. [1] Liu et al.
-       "Metric Learning from Relative Comparisons by Minimizing Squared
-       Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf
-
-    .. [2] Adapted from https://gist.github.com/kcarnold/5439917
-
-
 SDML
 ----
 
@@ -343,3 +346,46 @@ method. However, it is one of the earliest and a still often cited technique.
         -with-side-information.pdf>`_ Xing, Jordan, Russell, Ng.
   .. [2] Adapted from Matlab code `here <http://www.cs.cmu
      .edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz>`_.
+
+Learning on quadruplets
+=======================
+
+A type of information even weaker than pairs is information about relative
+comparisons between pairs. The user should provide the algorithm with a
+quadruplet of points, where the two first points are closer than the two
+last points. No target vector (``y``) is needed, since the supervision is
+already in the order that points are given in the quadruplet.
+
+Algorithms
+==========
+
+LSML
+----
+
+`LSML`: Metric Learning from Relative Comparisons by Minimizing Squared
+Residual
+
+.. topic:: Example Code:
+
+::
+
+    from metric_learn import LSML
+
+    quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]],
+                   [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]],
+                   [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]],
+                   [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]]
+
+    # we want to make closer points where the first feature is close, and
+    # further if the second feature is close
+
+    lsml = LSML()
+    lsml.fit(quadruplets)
+
+.. topic:: References:
+
+    .. [1] Liu et al.
+       "Metric Learning from Relative Comparisons by Minimizing Squared
+       Residual". ICDM 2012. http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf
+
+    .. [2] Adapted from https://gist.github.com/kcarnold/5439917